Search in sources :

Example 66 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestBinaryDocValuesUpdates method testMultipleBinaryDocValues.

public void testMultipleBinaryDocValues() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // prevent merges
    conf.setMaxBufferedDocs(10);
    IndexWriter writer = new IndexWriter(dir, conf);
    for (int i = 0; i < 2; i++) {
        Document doc = new Document();
        doc.add(new StringField("dvUpdateKey", "dv", Store.NO));
        doc.add(new BinaryDocValuesField("bdv1", toBytes(i)));
        doc.add(new BinaryDocValuesField("bdv2", toBytes(i)));
        writer.addDocument(doc);
    }
    writer.commit();
    // update all docs' bdv1 field
    writer.updateBinaryDocValue(new Term("dvUpdateKey", "dv"), "bdv1", toBytes(17L));
    writer.close();
    final DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader r = reader.leaves().get(0).reader();
    BinaryDocValues bdv1 = r.getBinaryDocValues("bdv1");
    BinaryDocValues bdv2 = r.getBinaryDocValues("bdv2");
    for (int i = 0; i < r.maxDoc(); i++) {
        assertEquals(i, bdv1.nextDoc());
        assertEquals(17, getValue(bdv1));
        assertEquals(i, bdv2.nextDoc());
        assertEquals(i, getValue(bdv2));
    }
    reader.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory)

Example 67 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class Test2BBinaryDocValues method testVariableBinary.

// indexes IndexWriter.MAX_DOCS docs with a variable binary field
public void testVariableBinary() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
    Document doc = new Document();
    byte[] bytes = new byte[4];
    ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
    BytesRef data = new BytesRef(bytes);
    BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
    doc.add(dvField);
    for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
        encoder.reset(bytes);
        // 1, 2, or 3 bytes
        encoder.writeVInt(i % 65535);
        data.length = encoder.getPosition();
        w.addDocument(doc);
        if (i % 100000 == 0) {
            System.out.println("indexed: " + i);
            System.out.flush();
        }
    }
    w.forceMerge(1);
    w.close();
    System.out.println("verifying...");
    System.out.flush();
    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    for (LeafReaderContext context : r.leaves()) {
        LeafReader reader = context.reader();
        BinaryDocValues dv = reader.getBinaryDocValues("dv");
        for (int i = 0; i < reader.maxDoc(); i++) {
            assertEquals(i, dv.nextDoc());
            final BytesRef term = dv.binaryValue();
            input.reset(term.bytes, term.offset, term.length);
            assertEquals(expectedValue % 65535, input.readVInt());
            assertTrue(input.eof());
            expectedValue++;
        }
    }
    r.close();
    dir.close();
}
Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) Document(org.apache.lucene.document.Document) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ByteArrayDataOutput(org.apache.lucene.store.ByteArrayDataOutput) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) BytesRef(org.apache.lucene.util.BytesRef)

Example 68 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestBinaryDocValuesUpdates method testSegmentMerges.

public void testSegmentMerges() throws Exception {
    Directory dir = newDirectory();
    Random random = random();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
    IndexWriter writer = new IndexWriter(dir, conf);
    int docid = 0;
    int numRounds = atLeast(10);
    for (int rnd = 0; rnd < numRounds; rnd++) {
        Document doc = new Document();
        doc.add(new StringField("key", "doc", Store.NO));
        doc.add(new BinaryDocValuesField("bdv", toBytes(-1)));
        int numDocs = atLeast(30);
        for (int i = 0; i < numDocs; i++) {
            doc.removeField("id");
            doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
            writer.addDocument(doc);
        }
        long value = rnd + 1;
        writer.updateBinaryDocValue(new Term("key", "doc"), "bdv", toBytes(value));
        if (random.nextDouble() < 0.2) {
            // randomly delete some docs
            writer.deleteDocuments(new Term("id", Integer.toString(random.nextInt(docid))));
        }
        // randomly commit or reopen-IW (or nothing), before forceMerge
        if (random.nextDouble() < 0.4) {
            writer.commit();
        } else if (random.nextDouble() < 0.1) {
            writer.close();
            conf = newIndexWriterConfig(new MockAnalyzer(random));
            writer = new IndexWriter(dir, conf);
        }
        // add another document with the current value, to be sure forceMerge has
        // something to merge (for instance, it could be that CMS finished merging
        // all segments down to 1 before the delete was applied, so when
        // forceMerge is called, the index will be with one segment and deletes
        // and some MPs might now merge it, thereby invalidating test's
        // assumption that the reader has no deletes).
        doc = new Document();
        doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
        doc.add(new StringField("key", "doc", Store.NO));
        doc.add(new BinaryDocValuesField("bdv", toBytes(value)));
        writer.addDocument(doc);
        writer.forceMerge(1, true);
        final DirectoryReader reader;
        if (random.nextBoolean()) {
            writer.commit();
            reader = DirectoryReader.open(dir);
        } else {
            reader = DirectoryReader.open(writer);
        }
        assertEquals(1, reader.leaves().size());
        final LeafReader r = reader.leaves().get(0).reader();
        assertNull("index should have no deletes after forceMerge", r.getLiveDocs());
        BinaryDocValues bdv = r.getBinaryDocValues("bdv");
        assertNotNull(bdv);
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, bdv.nextDoc());
            assertEquals(value, getValue(bdv));
        }
        reader.close();
    }
    writer.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory)

Example 69 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestBinaryDocValuesUpdates method testTonsOfUpdates.

@Nightly
public void testTonsOfUpdates() throws Exception {
    // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
    Directory dir = newDirectory();
    final Random random = random();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
    conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
    // don't flush by doc
    conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    IndexWriter writer = new IndexWriter(dir, conf);
    // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
    final int numDocs = atLeast(20000);
    final int numBinaryFields = atLeast(5);
    // terms should affect many docs
    final int numTerms = TestUtil.nextInt(random, 10, 100);
    Set<String> updateTerms = new HashSet<>();
    while (updateTerms.size() < numTerms) {
        updateTerms.add(TestUtil.randomSimpleString(random));
    }
    // build a large index with many BDV fields and update terms
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
        for (int j = 0; j < numUpdateTerms; j++) {
            doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
        }
        for (int j = 0; j < numBinaryFields; j++) {
            long val = random.nextInt();
            doc.add(new BinaryDocValuesField("f" + j, toBytes(val)));
            doc.add(new BinaryDocValuesField("cf" + j, toBytes(val * 2)));
        }
        writer.addDocument(doc);
    }
    // commit so there's something to apply to
    writer.commit();
    // set to flush every 2048 bytes (approximately every 12 updates), so we get
    // many flushes during binary updates
    writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
    final int numUpdates = atLeast(100);
    //    System.out.println("numUpdates=" + numUpdates);
    for (int i = 0; i < numUpdates; i++) {
        int field = random.nextInt(numBinaryFields);
        Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
        long value = random.nextInt();
        writer.updateDocValues(updateTerm, new BinaryDocValuesField("f" + field, toBytes(value)), new BinaryDocValuesField("cf" + field, toBytes(value * 2)));
    }
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    for (LeafReaderContext context : reader.leaves()) {
        for (int i = 0; i < numBinaryFields; i++) {
            LeafReader r = context.reader();
            BinaryDocValues f = r.getBinaryDocValues("f" + i);
            BinaryDocValues cf = r.getBinaryDocValues("cf" + i);
            for (int j = 0; j < r.maxDoc(); j++) {
                assertEquals(j, f.nextDoc());
                assertEquals(j, cf.nextDoc());
                assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, getValue(cf), getValue(f) * 2);
            }
        }
    }
    reader.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) HashSet(java.util.HashSet)

Example 70 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestBinaryDocValuesUpdates method testUpdateDifferentDocsInDifferentGens.

public void testUpdateDifferentDocsInDifferentGens() throws Exception {
    // update same document multiple times across generations
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMaxBufferedDocs(4);
    IndexWriter writer = new IndexWriter(dir, conf);
    final int numDocs = atLeast(10);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", "doc" + i, Store.NO));
        long value = random().nextInt();
        doc.add(new BinaryDocValuesField("f", toBytes(value)));
        doc.add(new BinaryDocValuesField("cf", toBytes(value * 2)));
        writer.addDocument(doc);
    }
    int numGens = atLeast(5);
    for (int i = 0; i < numGens; i++) {
        int doc = random().nextInt(numDocs);
        Term t = new Term("id", "doc" + doc);
        long value = random().nextLong();
        writer.updateDocValues(t, new BinaryDocValuesField("f", toBytes(value)), new BinaryDocValuesField("cf", toBytes(value * 2)));
        DirectoryReader reader = DirectoryReader.open(writer);
        for (LeafReaderContext context : reader.leaves()) {
            LeafReader r = context.reader();
            BinaryDocValues fbdv = r.getBinaryDocValues("f");
            BinaryDocValues cfbdv = r.getBinaryDocValues("cf");
            for (int j = 0; j < r.maxDoc(); j++) {
                assertEquals(j, fbdv.nextDoc());
                assertEquals(j, cfbdv.nextDoc());
                assertEquals(getValue(cfbdv), getValue(fbdv) * 2);
            }
        }
        reader.close();
    }
    writer.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory)

Aggregations

BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)90 Document (org.apache.lucene.document.Document)84 Directory (org.apache.lucene.store.Directory)71 BytesRef (org.apache.lucene.util.BytesRef)65 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)57 StringField (org.apache.lucene.document.StringField)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)40 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)29 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)24 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)23 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)21 Field (org.apache.lucene.document.Field)16 Analyzer (org.apache.lucene.analysis.Analyzer)15 Random (java.util.Random)12 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)12 StoredField (org.apache.lucene.document.StoredField)11 TextField (org.apache.lucene.document.TextField)11 IOException (java.io.IOException)9 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)9 LeafReader (org.apache.lucene.index.LeafReader)9