Examples with BinaryDocValuesField - org.apache.lucene.document.BinaryDocValuesField

Example 46 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestIndexWriterExceptions method testNoLostDeletesOrUpdates.

// Make sure if we hit a transient IOException (e.g., disk
// full), and then the exception stops (e.g., disk frees
// up), so we successfully close IW or open an NRT
// reader, we don't lose any deletes or updates:
public void testNoLostDeletesOrUpdates() throws Throwable {
    int deleteCount = 0;
    int docBase = 0;
    int docCount = 0;
    MockDirectoryWrapper dir = newMockDirectory();
    final AtomicBoolean shouldFail = new AtomicBoolean();
    dir.failOn(new MockDirectoryWrapper.Failure() {

        @Override
        public void eval(MockDirectoryWrapper dir) throws IOException {
            if (shouldFail.get() == false) {
                // flushing buffer, on closing the file:
                return;
            }
            if (random().nextInt(3) != 2) {
                return;
            }
            StackTraceElement[] trace = Thread.currentThread().getStackTrace();
            boolean sawSeal = false;
            boolean sawWrite = false;
            for (int i = 0; i < trace.length; i++) {
                if ("sealFlushedSegment".equals(trace[i].getMethodName())) {
                    sawSeal = true;
                    break;
                }
                if ("writeLiveDocs".equals(trace[i].getMethodName()) || "writeFieldUpdates".equals(trace[i].getMethodName())) {
                    sawWrite = true;
                }
            }
            // the segment is aborted and docs are lost:
            if (sawWrite && sawSeal == false) {
                if (VERBOSE) {
                    System.out.println("TEST: now fail; thread=" + Thread.currentThread().getName() + " exc:");
                    new Throwable().printStackTrace(System.out);
                }
                shouldFail.set(false);
                throw new FakeIOException();
            }
        }
    });
    RandomIndexWriter w = null;
    boolean tragic = false;
    for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++) {
        int numDocs = atLeast(100);
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " numDocs=" + numDocs + " docBase=" + docBase + " delCount=" + deleteCount);
        }
        if (w == null) {
            IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
            w = new RandomIndexWriter(random(), dir, iwc);
            // Since we hit exc during merging, a partial
            // forceMerge can easily return when there are still
            // too many segments in the index:
            w.setDoRandomForceMergeAssert(false);
        }
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            doc.add(new StringField("id", "" + (docBase + i), Field.Store.NO));
            doc.add(new NumericDocValuesField("f", 1L));
            doc.add(new NumericDocValuesField("cf", 2L));
            doc.add(new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(1L)));
            doc.add(new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(2L)));
            w.addDocument(doc);
        }
        docCount += numDocs;
        // TODO: we could make the test more evil, by letting
        // it throw more than one exc, randomly, before "recovering"
        // TODO: we could also install an infoStream and try
        // to fail in "more evil" places inside BDS
        shouldFail.set(true);
        boolean doClose = false;
        try {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextInt(10) == 7) {
                    boolean fieldUpdate = random().nextBoolean();
                    int docid = docBase + i;
                    if (fieldUpdate) {
                        long value = iter;
                        if (VERBOSE) {
                            System.out.println("  update id=" + docid + " to value " + value);
                        }
                        Term idTerm = new Term("id", Integer.toString(docid));
                        if (random().nextBoolean()) {
                            // update only numeric field
                            w.updateDocValues(idTerm, new NumericDocValuesField("f", value), new NumericDocValuesField("cf", value * 2));
                        } else if (random().nextBoolean()) {
                            w.updateDocValues(idTerm, new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(value)), new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(value * 2)));
                        } else {
                            w.updateDocValues(idTerm, new NumericDocValuesField("f", value), new NumericDocValuesField("cf", value * 2), new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(value)), new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(value * 2)));
                        }
                    }
                    // sometimes do both deletes and updates
                    if (!fieldUpdate || random().nextBoolean()) {
                        if (VERBOSE) {
                            System.out.println("  delete id=" + docid);
                        }
                        deleteCount++;
                        w.deleteDocuments(new Term("id", "" + docid));
                    }
                }
            }
            // Trigger writeLiveDocs + writeFieldUpdates so we hit fake exc:
            IndexReader r = w.getReader();
            // Sometimes we will make it here (we only randomly
            // throw the exc):
            assertEquals(docCount - deleteCount, r.numDocs());
            r.close();
            // Sometimes close, so the disk full happens on close:
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("  now close writer");
                }
                doClose = true;
                w.commit();
                w.close();
                w = null;
            }
        } catch (Throwable t) {
            // throws it as a wrapped IOE, so don't fail in this case.
            if (t instanceof FakeIOException || (t.getCause() instanceof FakeIOException)) {
                // expected
                if (VERBOSE) {
                    System.out.println("TEST: hit expected IOE");
                }
                if (t instanceof AlreadyClosedException) {
                    // FakeIOExc struck during merge and writer is now closed:
                    w = null;
                    tragic = true;
                }
            } else {
                throw t;
            }
        }
        shouldFail.set(false);
        if (w != null) {
            MergeScheduler ms = w.w.getConfig().getMergeScheduler();
            if (ms instanceof ConcurrentMergeScheduler) {
                ((ConcurrentMergeScheduler) ms).sync();
            }
            if (w.w.getTragicException() != null) {
                // Tragic exc in CMS closed the writer
                w = null;
            }
        }
        IndexReader r;
        if (doClose && w != null) {
            if (VERBOSE) {
                System.out.println("  now 2nd close writer");
            }
            w.close();
            w = null;
        }
        if (w == null || random().nextBoolean()) {
            // disk" bits are good:
            if (VERBOSE) {
                System.out.println("TEST: verify against non-NRT reader");
            }
            if (w != null) {
                w.commit();
            }
            r = DirectoryReader.open(dir);
        } else {
            if (VERBOSE) {
                System.out.println("TEST: verify against NRT reader");
            }
            r = w.getReader();
        }
        if (tragic == false) {
            assertEquals(docCount - deleteCount, r.numDocs());
        }
        BytesRef scratch = new BytesRef();
        for (LeafReaderContext context : r.leaves()) {
            LeafReader reader = context.reader();
            Bits liveDocs = reader.getLiveDocs();
            NumericDocValues f = reader.getNumericDocValues("f");
            NumericDocValues cf = reader.getNumericDocValues("cf");
            BinaryDocValues bf = reader.getBinaryDocValues("bf");
            BinaryDocValues bcf = reader.getBinaryDocValues("bcf");
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (liveDocs == null || liveDocs.get(i)) {
                    assertEquals(i, f.advance(i));
                    assertEquals(i, cf.advance(i));
                    assertEquals(i, bf.advance(i));
                    assertEquals(i, bcf.advance(i));
                    assertEquals("doc=" + (docBase + i), cf.longValue(), f.longValue() * 2);
                    assertEquals("doc=" + (docBase + i), TestBinaryDocValuesUpdates.getValue(bcf), TestBinaryDocValuesUpdates.getValue(bf) * 2);
                }
            }
        }
        r.close();
        // Sometimes re-use RIW, other times open new one:
        if (w != null && random().nextBoolean()) {
            if (VERBOSE) {
                System.out.println("TEST: close writer");
            }
            w.close();
            w = null;
        }
        docBase += numDocs;
    }
    if (w != null) {
        w.close();
    }
    // Final verify:
    if (tragic == false) {
        IndexReader r = DirectoryReader.open(dir);
        assertEquals(docCount - deleteCount, r.numDocs());
        r.close();
    }
    dir.close();
}

Also used : AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) FakeIOException(org.apache.lucene.store.MockDirectoryWrapper.FakeIOException) IOException(java.io.IOException) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FakeIOException(org.apache.lucene.store.MockDirectoryWrapper.FakeIOException) StringField(org.apache.lucene.document.StringField) Bits(org.apache.lucene.util.Bits)

Example 47 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestPerFieldDocValuesFormat method testTwoFieldsTwoFormats.

// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
    final DocValuesFormat slow = DocValuesFormat.forName("Memory");
    iwc.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if ("dv1".equals(field)) {
                return fast;
            } else {
                return slow;
            }
        }
    });
    IndexWriter iwriter = new IndexWriter(directory, iwc);
    Document doc = new Document();
    String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
    String text = "This is the text to be indexed. " + longTerm;
    doc.add(newTextField("fieldname", text, Field.Store.YES));
    doc.add(new NumericDocValuesField("dv1", 5));
    doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
    iwriter.addDocument(doc);
    iwriter.close();
    // Now search the index:
    // read-only=true
    IndexReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = newSearcher(ireader);
    assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
    Query query = new TermQuery(new Term("fieldname", "text"));
    TopDocs hits = isearcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    // Iterate through the results:
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        int hitDocID = hits.scoreDocs[i].doc;
        Document hitDoc = isearcher.doc(hitDocID);
        assertEquals(text, hitDoc.get("fieldname"));
        assert ireader.leaves().size() == 1;
        NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
        assertEquals(hitDocID, dv.advance(hitDocID));
        assertEquals(5, dv.longValue());
        BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
        assertEquals(hitDocID, dv2.advance(hitDocID));
        final BytesRef term = dv2.binaryValue();
        assertEquals(new BytesRef("hello world"), term);
    }
    ireader.close();
    directory.close();
}

Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 48 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class Test2BBinaryDocValues method testFixedBinary.

// indexes IndexWriter.MAX_DOCS docs with a fixed binary field
public void testFixedBinary() throws Exception {
    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BFixedBinary"));
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
    Document doc = new Document();
    byte[] bytes = new byte[4];
    BytesRef data = new BytesRef(bytes);
    BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
    doc.add(dvField);
    for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
        bytes[0] = (byte) (i >> 24);
        bytes[1] = (byte) (i >> 16);
        bytes[2] = (byte) (i >> 8);
        bytes[3] = (byte) i;
        w.addDocument(doc);
        if (i % 100000 == 0) {
            System.out.println("indexed: " + i);
            System.out.flush();
        }
    }
    w.forceMerge(1);
    w.close();
    System.out.println("verifying...");
    System.out.flush();
    DirectoryReader r = DirectoryReader.open(dir);
    int expectedValue = 0;
    for (LeafReaderContext context : r.leaves()) {
        LeafReader reader = context.reader();
        BinaryDocValues dv = reader.getBinaryDocValues("dv");
        for (int i = 0; i < reader.maxDoc(); i++) {
            bytes[0] = (byte) (expectedValue >> 24);
            bytes[1] = (byte) (expectedValue >> 16);
            bytes[2] = (byte) (expectedValue >> 8);
            bytes[3] = (byte) expectedValue;
            assertEquals(i, dv.nextDoc());
            final BytesRef term = dv.binaryValue();
            assertEquals(data, term);
            expectedValue++;
        }
    }
    r.close();
    dir.close();
}

Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) BytesRef(org.apache.lucene.util.BytesRef)

Example 49 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestMixedDocValuesUpdates method testUpdateDifferentDocsInDifferentGens.

public void testUpdateDifferentDocsInDifferentGens() throws Exception {
    // update same document multiple times across generations
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMaxBufferedDocs(4);
    IndexWriter writer = new IndexWriter(dir, conf);
    final int numDocs = atLeast(10);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", "doc" + i, Store.NO));
        long value = random().nextInt();
        doc.add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value)));
        doc.add(new NumericDocValuesField("cf", value * 2));
        writer.addDocument(doc);
    }
    int numGens = atLeast(5);
    for (int i = 0; i < numGens; i++) {
        int doc = random().nextInt(numDocs);
        Term t = new Term("id", "doc" + doc);
        long value = random().nextLong();
        writer.updateDocValues(t, new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf", value * 2));
        DirectoryReader reader = DirectoryReader.open(writer);
        for (LeafReaderContext context : reader.leaves()) {
            LeafReader r = context.reader();
            BinaryDocValues fbdv = r.getBinaryDocValues("f");
            NumericDocValues cfndv = r.getNumericDocValues("cf");
            for (int j = 0; j < r.maxDoc(); j++) {
                assertEquals(j, cfndv.nextDoc());
                assertEquals(j, fbdv.nextDoc());
                assertEquals(cfndv.longValue(), TestBinaryDocValuesUpdates.getValue(fbdv) * 2);
            }
        }
        reader.close();
    }
    writer.close();
    dir.close();
}

Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory)

Example 50 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestMixedDocValuesUpdates method testTonsOfUpdates.

@Nightly
public void testTonsOfUpdates() throws Exception {
    // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
    Directory dir = newDirectory();
    final Random random = random();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
    conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
    // don't flush by doc
    conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    IndexWriter writer = new IndexWriter(dir, conf);
    // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
    final int numDocs = atLeast(20000);
    final int numBinaryFields = atLeast(5);
    // terms should affect many docs
    final int numTerms = TestUtil.nextInt(random, 10, 100);
    Set<String> updateTerms = new HashSet<>();
    while (updateTerms.size() < numTerms) {
        updateTerms.add(TestUtil.randomSimpleString(random));
    }
    // build a large index with many BDV fields and update terms
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
        for (int j = 0; j < numUpdateTerms; j++) {
            doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
        }
        for (int j = 0; j < numBinaryFields; j++) {
            long val = random.nextInt();
            doc.add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.toBytes(val)));
            doc.add(new NumericDocValuesField("cf" + j, val * 2));
        }
        writer.addDocument(doc);
    }
    // commit so there's something to apply to
    writer.commit();
    // set to flush every 2048 bytes (approximately every 12 updates), so we get
    // many flushes during binary updates
    writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
    final int numUpdates = atLeast(100);
    //    System.out.println("numUpdates=" + numUpdates);
    for (int i = 0; i < numUpdates; i++) {
        int field = random.nextInt(numBinaryFields);
        Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
        long value = random.nextInt();
        writer.updateDocValues(updateTerm, new BinaryDocValuesField("f" + field, TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf" + field, value * 2));
    }
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    for (LeafReaderContext context : reader.leaves()) {
        for (int i = 0; i < numBinaryFields; i++) {
            LeafReader r = context.reader();
            BinaryDocValues f = r.getBinaryDocValues("f" + i);
            NumericDocValues cf = r.getNumericDocValues("cf" + i);
            for (int j = 0; j < r.maxDoc(); j++) {
                assertEquals(j, cf.nextDoc());
                assertEquals(j, f.nextDoc());
                assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.longValue(), TestBinaryDocValuesUpdates.getValue(f) * 2);
            }
        }
    }
    reader.close();
    dir.close();
}

Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)90 Document (org.apache.lucene.document.Document)84 Directory (org.apache.lucene.store.Directory)71 BytesRef (org.apache.lucene.util.BytesRef)65 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)57 StringField (org.apache.lucene.document.StringField)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)40 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)29 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)24 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)23 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)21 Field (org.apache.lucene.document.Field)16 Analyzer (org.apache.lucene.analysis.Analyzer)15 Random (java.util.Random)12 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)12 StoredField (org.apache.lucene.document.StoredField)11 TextField (org.apache.lucene.document.TextField)11 IOException (java.io.IOException)9 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)9 LeafReader (org.apache.lucene.index.LeafReader)9