Search in sources :

Example 21 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class LegacyDocValuesIterables method binaryIterable.

/** Converts values from {@link BinaryDocValues} into {@code Iterable<BytesRef>}.
   *
   * @deprecated Consume {@link BinaryDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> binaryIterable(final FieldInfo field, final DocValuesProducer valuesProducer, final int maxDoc) {
    return new Iterable<BytesRef>() {

        @Override
        public Iterator<BytesRef> iterator() {
            final BinaryDocValues values;
            try {
                values = valuesProducer.getBinary(field);
            } catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
            return new Iterator<BytesRef>() {

                private int docIDUpto = -1;

                @Override
                public boolean hasNext() {
                    return docIDUpto + 1 < maxDoc;
                }

                @Override
                public BytesRef next() {
                    docIDUpto++;
                    if (docIDUpto > values.docID()) {
                        try {
                            values.nextDoc();
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                    BytesRef result;
                    if (docIDUpto == values.docID()) {
                        try {
                            result = values.binaryValue();
                        } catch (IOException e) {
                            throw new RuntimeException(e);
                        }
                    } else {
                        result = null;
                    }
                    return result;
                }
            };
        }
    };
}
Also used : Iterator(java.util.Iterator) IOException(java.io.IOException) BytesRef(org.apache.lucene.util.BytesRef) BinaryDocValues(org.apache.lucene.index.BinaryDocValues)

Example 22 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestPerFieldDocValuesFormat method testTwoFieldsTwoFormats.

// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
    final DocValuesFormat slow = DocValuesFormat.forName("Memory");
    iwc.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if ("dv1".equals(field)) {
                return fast;
            } else {
                return slow;
            }
        }
    });
    IndexWriter iwriter = new IndexWriter(directory, iwc);
    Document doc = new Document();
    String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
    String text = "This is the text to be indexed. " + longTerm;
    doc.add(newTextField("fieldname", text, Field.Store.YES));
    doc.add(new NumericDocValuesField("dv1", 5));
    doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
    iwriter.addDocument(doc);
    iwriter.close();
    // Now search the index:
    // read-only=true
    IndexReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = newSearcher(ireader);
    assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
    Query query = new TermQuery(new Term("fieldname", "text"));
    TopDocs hits = isearcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    // Iterate through the results:
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        int hitDocID = hits.scoreDocs[i].doc;
        Document hitDoc = isearcher.doc(hitDocID);
        assertEquals(text, hitDoc.get("fieldname"));
        assert ireader.leaves().size() == 1;
        NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
        assertEquals(hitDocID, dv.advance(hitDocID));
        assertEquals(5, dv.longValue());
        BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
        assertEquals(hitDocID, dv2.advance(hitDocID));
        final BytesRef term = dv2.binaryValue();
        assertEquals(new BytesRef("hello world"), term);
    }
    ireader.close();
    directory.close();
}
Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 23 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestMemoryIndex method testIndexingPointsAndDocValues.

public void testIndexingPointsAndDocValues() throws Exception {
    FieldType type = new FieldType();
    type.setDimensions(1, 4);
    type.setDocValuesType(DocValuesType.BINARY);
    type.freeze();
    Document doc = new Document();
    byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
    doc.add(new BinaryPoint("field", packedPoint, type));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    assertEquals(1, leafReader.getPointValues("field").size());
    assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
    assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());
    BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
    assertEquals(0, dvs.nextDoc());
    assertEquals("term", dvs.binaryValue().utf8ToString());
}
Also used : BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) FieldType(org.apache.lucene.document.FieldType)

Example 24 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestMemoryIndex method testDocValues.

public void testDocValues() throws Exception {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("numeric", 29L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 33L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 32L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 31L));
    doc.add(new SortedNumericDocValuesField("sorted_numeric", 30L));
    doc.add(new BinaryDocValuesField("binary", new BytesRef("a")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("b")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("f")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("d")));
    doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef("c")));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    NumericDocValues numericDocValues = leafReader.getNumericDocValues("numeric");
    assertEquals(0, numericDocValues.nextDoc());
    assertEquals(29L, numericDocValues.longValue());
    SortedNumericDocValues sortedNumericDocValues = leafReader.getSortedNumericDocValues("sorted_numeric");
    assertEquals(0, sortedNumericDocValues.nextDoc());
    assertEquals(5, sortedNumericDocValues.docValueCount());
    assertEquals(30L, sortedNumericDocValues.nextValue());
    assertEquals(31L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(32L, sortedNumericDocValues.nextValue());
    assertEquals(33L, sortedNumericDocValues.nextValue());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("binary");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("a", binaryDocValues.binaryValue().utf8ToString());
    SortedDocValues sortedDocValues = leafReader.getSortedDocValues("sorted");
    assertEquals(0, sortedDocValues.nextDoc());
    assertEquals("b", sortedDocValues.binaryValue().utf8ToString());
    assertEquals(0, sortedDocValues.ordValue());
    assertEquals("b", sortedDocValues.lookupOrd(0).utf8ToString());
    SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
    assertEquals(3, sortedSetDocValues.getValueCount());
    assertEquals(0, sortedSetDocValues.nextDoc());
    assertEquals(0L, sortedSetDocValues.nextOrd());
    assertEquals(1L, sortedSetDocValues.nextOrd());
    assertEquals(2L, sortedSetDocValues.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSetDocValues.nextOrd());
    assertEquals("c", sortedSetDocValues.lookupOrd(0L).utf8ToString());
    assertEquals("d", sortedSetDocValues.lookupOrd(1L).utf8ToString());
    assertEquals("f", sortedSetDocValues.lookupOrd(2L).utf8ToString());
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 25 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCacheWithThreads method test.

public void test() throws Exception {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    final List<Long> numbers = new ArrayList<>();
    final List<BytesRef> binary = new ArrayList<>();
    final List<BytesRef> sorted = new ArrayList<>();
    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; i++) {
        Document d = new Document();
        long number = random().nextLong();
        d.add(new NumericDocValuesField("number", number));
        BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
        d.add(new BinaryDocValuesField("bytes", bytes));
        binary.add(bytes);
        bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
        d.add(new SortedDocValuesField("sorted", bytes));
        sorted.add(bytes);
        w.addDocument(d);
        numbers.add(number);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    assertEquals(1, r.leaves().size());
    final LeafReader ar = r.leaves().get(0).reader();
    int numThreads = TestUtil.nextInt(random(), 2, 5);
    List<Thread> threads = new ArrayList<>();
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int t = 0; t < numThreads; t++) {
        final Random threadRandom = new Random(random().nextLong());
        Thread thread = new Thread() {

            @Override
            public void run() {
                try {
                    startingGun.await();
                    int iters = atLeast(1000);
                    for (int iter = 0; iter < iters; iter++) {
                        int docID = threadRandom.nextInt(numDocs);
                        switch(threadRandom.nextInt(4)) {
                            case 0:
                                {
                                    NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER);
                                    assertEquals(docID, values.advance(docID));
                                    assertEquals(numbers.get(docID).longValue(), values.longValue());
                                }
                                break;
                            case 1:
                                {
                                    NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER);
                                    assertEquals(docID, values.advance(docID));
                                    assertEquals(numbers.get(docID).longValue(), values.longValue());
                                }
                                break;
                            case 2:
                                {
                                    NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER);
                                    assertEquals(docID, values.advance(docID));
                                    assertEquals(numbers.get(docID).longValue(), values.longValue());
                                }
                                break;
                            case 3:
                                {
                                    NumericDocValues values = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER);
                                    assertEquals(docID, values.advance(docID));
                                    assertEquals(numbers.get(docID).longValue(), values.longValue());
                                }
                                break;
                        }
                        BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes");
                        assertEquals(docID, bdv.advance(docID));
                        assertEquals(binary.get(docID), bdv.binaryValue());
                        SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
                        assertEquals(docID, sdv.advance(docID));
                        assertEquals(sorted.get(docID), sdv.binaryValue());
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        thread.start();
        threads.add(thread);
    }
    startingGun.countDown();
    for (Thread thread : threads) {
        thread.join();
    }
    r.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Random(java.util.Random) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) LeafReader(org.apache.lucene.index.LeafReader) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IOException(java.io.IOException) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader)

Aggregations

BinaryDocValues (org.apache.lucene.index.BinaryDocValues)37 BytesRef (org.apache.lucene.util.BytesRef)29 Document (org.apache.lucene.document.Document)13 LeafReader (org.apache.lucene.index.LeafReader)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)12 NumericDocValues (org.apache.lucene.index.NumericDocValues)11 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)11 Directory (org.apache.lucene.store.Directory)10 ArrayList (java.util.ArrayList)9 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)6 Bits (org.apache.lucene.util.Bits)6 IOException (java.io.IOException)5 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5 IndexReader (org.apache.lucene.index.IndexReader)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5