Search in sources :

Example 96 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestMultiDocValues method testSortedSet.

public void testSortedSet() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numValues = random().nextInt(5);
        for (int j = 0; j < numValues; j++) {
            doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random()))));
        }
        iw.addDocument(doc);
        if (random().nextInt(17) == 0) {
            iw.commit();
        }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();
    SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
    SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
    if (multi == null) {
        assertNull(single);
    } else {
        assertEquals(single.getValueCount(), multi.getValueCount());
        // check values
        for (long i = 0; i < single.getValueCount(); i++) {
            final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
            final BytesRef actual = multi.lookupOrd(i);
            assertEquals(expected, actual);
        }
        // check ord list
        while (true) {
            int docID = single.nextDoc();
            assertEquals(docID, multi.nextDoc());
            if (docID == NO_MORE_DOCS) {
                break;
            }
            ArrayList<Long> expectedList = new ArrayList<>();
            long ord;
            while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                expectedList.add(ord);
            }
            int upto = 0;
            while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                assertEquals(expectedList.get(upto).longValue(), ord);
                upto++;
            }
            assertEquals(expectedList.size(), upto);
        }
    }
    testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
    testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
    ir.close();
    ir2.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 97 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestMemoryIndex method testDocValues.

public void testDocValues() throws Exception {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("numeric", 29L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
    doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    assertEquals(0, numericDocValues.nextDoc());
    assertEquals(29L, numericDocValues.longValue());
    SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
    assertEquals(0, sortedNumericDocValues.nextDoc());
    assertEquals(5, sortedNumericDocValues.docValueCount());
    assertEquals(30L, sortedNumericDocValues.nextValue());
    assertEquals(31L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(33L, sortedNumericDocValues.nextValue());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    assertEquals(0, sortedDocValues.nextDoc());
    assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
    assertEquals(0, sortedDocValues.ordValue());
    assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    assertEquals(3, sortedSetDocValues.getValueCount());
    assertEquals(0, sortedSetDocValues.nextDoc());
    assertEquals(0L, sortedSetDocValues.nextOrd());
    assertEquals(1L, sortedSetDocValues.nextOrd());
    assertEquals(2L, sortedSetDocValues.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
    assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
    assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
    assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 98 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestIndexWriterExceptions2 method testBasics.

// just one thread, serial merge policy, hopefully debuggable
public void testBasics() throws Exception {
    // disable slow things: we don't rely upon sleeps here.
    Directory dir = newDirectory();
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
        ((MockDirectoryWrapper) dir).setUseSlowOpenClosers(false);
    }
    // log all exceptions we hit, in case we fail (for debugging)
    ByteArrayOutputStream exceptionLog = new ByteArrayOutputStream();
    PrintStream exceptionStream = new PrintStream(exceptionLog, true, "UTF-8");
    //PrintStream exceptionStream = System.out;
    // create lots of non-aborting exceptions with a broken analyzer
    final long analyzerSeed = random().nextLong();
    Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, false);
            // TODO: can we turn this on? our filter is probably too evil
            tokenizer.setEnableChecks(false);
            TokenStream stream = tokenizer;
            // emit some payloads
            if (fieldName.contains("payloads")) {
                stream = new MockVariableLengthPayloadFilter(new Random(analyzerSeed), stream);
            }
            stream = new CrankyTokenFilter(stream, new Random(analyzerSeed));
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
    // create lots of aborting exceptions with a broken codec
    // we don't need a random codec, as we aren't trying to find bugs in the codec here.
    Codec inner = RANDOM_MULTIPLIER > 1 ? Codec.getDefault() : new AssertingCodec();
    Codec codec = new CrankyCodec(inner, new Random(random().nextLong()));
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    // just for now, try to keep this test reproducible
    conf.setMergeScheduler(new SerialMergeScheduler());
    conf.setCodec(codec);
    int numDocs = atLeast(500);
    IndexWriter iw = new IndexWriter(dir, conf);
    try {
        boolean allowAlreadyClosed = false;
        for (int i = 0; i < numDocs; i++) {
            // TODO: add crankyDocValuesFields, etc
            Document doc = new Document();
            doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
            doc.add(new NumericDocValuesField("dv", i));
            doc.add(new BinaryDocValuesField("dv2", new BytesRef(Integer.toString(i))));
            doc.add(new SortedDocValuesField("dv3", new BytesRef(Integer.toString(i))));
            doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i))));
            doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i - 1))));
            doc.add(new SortedNumericDocValuesField("dv5", i));
            doc.add(new SortedNumericDocValuesField("dv5", i - 1));
            doc.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
            // ensure we store something
            doc.add(new StoredField("stored1", "foo"));
            doc.add(new StoredField("stored1", "bar"));
            // ensure we get some payloads
            doc.add(newTextField("text_payloads", TestUtil.randomAnalysisString(random(), 6, true), Field.Store.NO));
            // ensure we get some vectors
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.setStoreTermVectors(true);
            doc.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
            doc.add(new IntPoint("point", random().nextInt()));
            doc.add(new IntPoint("point2d", random().nextInt(), random().nextInt()));
            if (random().nextInt(10) > 0) {
                // single doc
                try {
                    iw.addDocument(doc);
                    // we made it, sometimes delete our doc, or update a dv
                    int thingToDo = random().nextInt(4);
                    if (thingToDo == 0) {
                        iw.deleteDocuments(new Term("id", Integer.toString(i)));
                    } else if (thingToDo == 1) {
                        iw.updateNumericDocValue(new Term("id", Integer.toString(i)), "dv", i + 1L);
                    } else if (thingToDo == 2) {
                        iw.updateBinaryDocValue(new Term("id", Integer.toString(i)), "dv2", new BytesRef(Integer.toString(i + 1)));
                    }
                } catch (AlreadyClosedException ace) {
                    // OK: writer was closed by abort; we just reopen now:
                    assertTrue(iw.deleter.isClosed());
                    assertTrue(allowAlreadyClosed);
                    allowAlreadyClosed = false;
                    conf = newIndexWriterConfig(analyzer);
                    // just for now, try to keep this test reproducible
                    conf.setMergeScheduler(new SerialMergeScheduler());
                    conf.setCodec(codec);
                    iw = new IndexWriter(dir, conf);
                } catch (Exception e) {
                    if (e.getMessage() != null && e.getMessage().startsWith("Fake IOException")) {
                        exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
                        e.printStackTrace(exceptionStream);
                        allowAlreadyClosed = true;
                    } else {
                        Rethrow.rethrow(e);
                    }
                }
            } else {
                // block docs
                Document doc2 = new Document();
                doc2.add(newStringField("id", Integer.toString(-i), Field.Store.NO));
                doc2.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
                doc2.add(new StoredField("stored1", "foo"));
                doc2.add(new StoredField("stored1", "bar"));
                doc2.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
                try {
                    iw.addDocuments(Arrays.asList(doc, doc2));
                    // we made it, sometimes delete our docs
                    if (random().nextBoolean()) {
                        iw.deleteDocuments(new Term("id", Integer.toString(i)), new Term("id", Integer.toString(-i)));
                    }
                } catch (AlreadyClosedException ace) {
                    // OK: writer was closed by abort; we just reopen now:
                    assertTrue(iw.deleter.isClosed());
                    assertTrue(allowAlreadyClosed);
                    allowAlreadyClosed = false;
                    conf = newIndexWriterConfig(analyzer);
                    // just for now, try to keep this test reproducible
                    conf.setMergeScheduler(new SerialMergeScheduler());
                    conf.setCodec(codec);
                    iw = new IndexWriter(dir, conf);
                } catch (Exception e) {
                    if (e.getMessage() != null && e.getMessage().startsWith("Fake IOException")) {
                        exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
                        e.printStackTrace(exceptionStream);
                        allowAlreadyClosed = true;
                    } else {
                        Rethrow.rethrow(e);
                    }
                }
            }
            if (random().nextInt(10) == 0) {
                // trigger flush:
                try {
                    if (random().nextBoolean()) {
                        DirectoryReader ir = null;
                        try {
                            ir = DirectoryReader.open(iw, random().nextBoolean(), false);
                            TestUtil.checkReader(ir);
                        } finally {
                            IOUtils.closeWhileHandlingException(ir);
                        }
                    } else {
                        iw.commit();
                    }
                    if (DirectoryReader.indexExists(dir)) {
                        TestUtil.checkIndex(dir);
                    }
                } catch (AlreadyClosedException ace) {
                    // OK: writer was closed by abort; we just reopen now:
                    assertTrue(iw.deleter.isClosed());
                    assertTrue(allowAlreadyClosed);
                    allowAlreadyClosed = false;
                    conf = newIndexWriterConfig(analyzer);
                    // just for now, try to keep this test reproducible
                    conf.setMergeScheduler(new SerialMergeScheduler());
                    conf.setCodec(codec);
                    iw = new IndexWriter(dir, conf);
                } catch (Exception e) {
                    if (e.getMessage() != null && e.getMessage().startsWith("Fake IOException")) {
                        exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
                        e.printStackTrace(exceptionStream);
                        allowAlreadyClosed = true;
                    } else {
                        Rethrow.rethrow(e);
                    }
                }
            }
        }
        try {
            iw.close();
        } catch (Exception e) {
            if (e.getMessage() != null && e.getMessage().startsWith("Fake IOException")) {
                exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
                e.printStackTrace(exceptionStream);
                try {
                    iw.rollback();
                } catch (Throwable t) {
                }
            } else {
                Rethrow.rethrow(e);
            }
        }
        dir.close();
    } catch (Throwable t) {
        System.out.println("Unexpected exception: dumping fake-exception-log:...");
        exceptionStream.flush();
        System.out.println(exceptionLog.toString("UTF-8"));
        System.out.flush();
        Rethrow.rethrow(t);
    }
    if (VERBOSE) {
        System.out.println("TEST PASSED: dumping fake-exception-log:...");
        System.out.println(exceptionLog.toString("UTF-8"));
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CrankyTokenFilter(org.apache.lucene.analysis.CrankyTokenFilter) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) CrankyCodec(org.apache.lucene.codecs.cranky.CrankyCodec) AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) Codec(org.apache.lucene.codecs.Codec) StoredField(org.apache.lucene.document.StoredField) Random(java.util.Random) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) CrankyCodec(org.apache.lucene.codecs.cranky.CrankyCodec) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) PrintStream(java.io.PrintStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) IntPoint(org.apache.lucene.document.IntPoint) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) MockVariableLengthPayloadFilter(org.apache.lucene.analysis.MockVariableLengthPayloadFilter)

Aggregations

SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)98 BytesRef (org.apache.lucene.util.BytesRef)96 Document (org.apache.lucene.document.Document)82 Directory (org.apache.lucene.store.Directory)74 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)38 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)36 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)27 IndexReader (org.apache.lucene.index.IndexReader)27 StringField (org.apache.lucene.document.StringField)23 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)20 ArrayList (java.util.ArrayList)18 Analyzer (org.apache.lucene.analysis.Analyzer)14 IndexableField (org.apache.lucene.index.IndexableField)13 Field (org.apache.lucene.document.Field)12 DirectoryReader (org.apache.lucene.index.DirectoryReader)11 LeafReader (org.apache.lucene.index.LeafReader)11 IntPoint (org.apache.lucene.document.IntPoint)10 StoredField (org.apache.lucene.document.StoredField)10