Search in sources :

Example 11 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestJoinUtil method createContext.

private IndexIterationContext createContext(int nDocs, boolean multipleValuesPerDocument, boolean globalOrdinalJoin) throws IOException {
    if (globalOrdinalJoin) {
        assertFalse("ordinal join doesn't support multiple join values per document", multipleValuesPerDocument);
    }
    Directory dir = newDirectory();
    final Random random = random();
    RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
    IndexIterationContext context = new IndexIterationContext();
    int numRandomValues = nDocs / RandomNumbers.randomIntBetween(random, 1, 4);
    context.randomUniqueValues = new String[numRandomValues];
    Set<String> trackSet = new HashSet<>();
    context.randomFrom = new boolean[numRandomValues];
    for (int i = 0; i < numRandomValues; i++) {
        String uniqueRandomValue;
        do {
            // the trick is to generate values which will be ordered similarly for string, ints&longs, positive nums makes it easier
            final int nextInt = random.nextInt(Integer.MAX_VALUE);
            uniqueRandomValue = String.format(Locale.ROOT, "%08x", nextInt);
            assert nextInt == Integer.parseUnsignedInt(uniqueRandomValue, 16);
        } while ("".equals(uniqueRandomValue) || trackSet.contains(uniqueRandomValue));
        // Generate unique values and empty strings aren't allowed.
        trackSet.add(uniqueRandomValue);
        context.randomFrom[i] = random.nextBoolean();
        context.randomUniqueValues[i] = uniqueRandomValue;
    }
    List<String> randomUniqueValuesReplica = new ArrayList<>(Arrays.asList(context.randomUniqueValues));
    RandomDoc[] docs = new RandomDoc[nDocs];
    for (int i = 0; i < nDocs; i++) {
        String id = Integer.toString(i);
        int randomI = random.nextInt(context.randomUniqueValues.length);
        String value = context.randomUniqueValues[randomI];
        Document document = new Document();
        document.add(newTextField(random, "id", id, Field.Store.YES));
        document.add(newTextField(random, "value", value, Field.Store.NO));
        boolean from = context.randomFrom[randomI];
        int numberOfLinkValues = multipleValuesPerDocument ? Math.min(2 + random.nextInt(10), context.randomUniqueValues.length) : 1;
        docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
        if (globalOrdinalJoin) {
            document.add(newStringField("type", from ? "from" : "to", Field.Store.NO));
        }
        final List<String> subValues;
        {
            int start = randomUniqueValuesReplica.size() == numberOfLinkValues ? 0 : random.nextInt(randomUniqueValuesReplica.size() - numberOfLinkValues);
            subValues = randomUniqueValuesReplica.subList(start, start + numberOfLinkValues);
            Collections.shuffle(subValues, random);
        }
        for (String linkValue : subValues) {
            assert !docs[i].linkValues.contains(linkValue);
            docs[i].linkValues.add(linkValue);
            if (from) {
                if (!context.fromDocuments.containsKey(linkValue)) {
                    context.fromDocuments.put(linkValue, new ArrayList<>());
                }
                if (!context.randomValueFromDocs.containsKey(value)) {
                    context.randomValueFromDocs.put(value, new ArrayList<>());
                }
                context.fromDocuments.get(linkValue).add(docs[i]);
                context.randomValueFromDocs.get(value).add(docs[i]);
                addLinkFields(random, document, "from", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
            } else {
                if (!context.toDocuments.containsKey(linkValue)) {
                    context.toDocuments.put(linkValue, new ArrayList<>());
                }
                if (!context.randomValueToDocs.containsKey(value)) {
                    context.randomValueToDocs.put(value, new ArrayList<>());
                }
                context.toDocuments.get(linkValue).add(docs[i]);
                context.randomValueToDocs.get(value).add(docs[i]);
                addLinkFields(random, document, "to", linkValue, multipleValuesPerDocument, globalOrdinalJoin);
            }
        }
        w.addDocument(document);
        if (random.nextInt(10) == 4) {
            w.commit();
        }
        if (VERBOSE) {
            System.out.println("Added document[" + docs[i].id + "]: " + document);
        }
    }
    if (random.nextBoolean()) {
        w.forceMerge(1);
    }
    w.close();
    // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
    // any ScoreMode.
    DirectoryReader topLevelReader = DirectoryReader.open(dir);
    IndexSearcher searcher = newSearcher(topLevelReader);
    for (int i = 0; i < context.randomUniqueValues.length; i++) {
        String uniqueRandomValue = context.randomUniqueValues[i];
        final String fromField;
        final String toField;
        final Map<String, Map<Integer, JoinScore>> queryVals;
        if (context.randomFrom[i]) {
            fromField = "from";
            toField = "to";
            queryVals = context.fromHitsToJoinScore;
        } else {
            fromField = "to";
            toField = "from";
            queryVals = context.toHitsToJoinScore;
        }
        final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<>();
        if (multipleValuesPerDocument) {
            searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {

                private Scorer scorer;

                private SortedSetDocValues docTermOrds;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > docTermOrds.docID()) {
                        docTermOrds.advance(doc);
                    }
                    if (doc == docTermOrds.docID()) {
                        long ord;
                        while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                            final BytesRef joinValue = docTermOrds.lookupOrd(ord);
                            JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                            if (joinScore == null) {
                                joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                            }
                            joinScore.addScore(scorer.score());
                        }
                    }
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    docTermOrds = DocValues.getSortedSet(context.reader(), fromField);
                }

                @Override
                public void setScorer(Scorer scorer) {
                    this.scorer = scorer;
                }

                @Override
                public boolean needsScores() {
                    return true;
                }
            });
        } else {
            searcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new SimpleCollector() {

                private Scorer scorer;

                private BinaryDocValues terms;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > terms.docID()) {
                        terms.advance(doc);
                    }
                    final BytesRef joinValue;
                    if (doc == terms.docID()) {
                        joinValue = terms.binaryValue();
                    } else {
                        // missing;
                        return;
                    }
                    JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                    if (joinScore == null) {
                        joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
                    }
                    if (VERBOSE) {
                        System.out.println("expected val=" + joinValue.utf8ToString() + " expected score=" + scorer.score());
                    }
                    joinScore.addScore(scorer.score());
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    terms = DocValues.getBinary(context.reader(), fromField);
                }

                @Override
                public void setScorer(Scorer scorer) {
                    this.scorer = scorer;
                }

                @Override
                public boolean needsScores() {
                    return true;
                }
            });
        }
        final Map<Integer, JoinScore> docToJoinScore = new HashMap<>();
        if (multipleValuesPerDocument) {
            Terms terms = MultiFields.getTerms(topLevelReader, toField);
            if (terms != null) {
                PostingsEnum postingsEnum = null;
                SortedSet<BytesRef> joinValues = new TreeSet<>();
                joinValues.addAll(joinValueToJoinScores.keySet());
                for (BytesRef joinValue : joinValues) {
                    TermsEnum termsEnum = terms.iterator();
                    if (termsEnum.seekExact(joinValue)) {
                        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                        for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
                            // Something to keep in mind for many-to-many relations.
                            if (!docToJoinScore.containsKey(doc)) {
                                docToJoinScore.put(doc, joinScore);
                            }
                        }
                    }
                }
            }
        } else {
            searcher.search(new MatchAllDocsQuery(), new SimpleCollector() {

                private BinaryDocValues terms;

                private int docBase;

                @Override
                public void collect(int doc) throws IOException {
                    if (doc > terms.docID()) {
                        terms.advance(doc);
                    }
                    final BytesRef joinValue;
                    if (doc == terms.docID()) {
                        joinValue = terms.binaryValue();
                    } else {
                        // missing;
                        joinValue = new BytesRef(BytesRef.EMPTY_BYTES);
                    }
                    JoinScore joinScore = joinValueToJoinScores.get(joinValue);
                    if (joinScore == null) {
                        return;
                    }
                    docToJoinScore.put(docBase + doc, joinScore);
                }

                @Override
                protected void doSetNextReader(LeafReaderContext context) throws IOException {
                    terms = DocValues.getBinary(context.reader(), toField);
                    docBase = context.docBase;
                }

                @Override
                public void setScorer(Scorer scorer) {
                }

                @Override
                public boolean needsScores() {
                    return false;
                }
            });
        }
        queryVals.put(uniqueRandomValue, docToJoinScore);
    }
    if (globalOrdinalJoin) {
        SortedDocValues[] values = new SortedDocValues[topLevelReader.leaves().size()];
        for (LeafReaderContext leadContext : topLevelReader.leaves()) {
            values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
        }
        context.ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    }
    context.searcher = searcher;
    context.dir = dir;
    return context;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Scorer(org.apache.lucene.search.Scorer) FilterScorer(org.apache.lucene.search.FilterScorer) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum) SimpleCollector(org.apache.lucene.search.SimpleCollector) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) TreeSet(java.util.TreeSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Map(java.util.Map) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) HashMap(java.util.HashMap) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 12 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestMemoryIndex method testDocValuesDoNotAffectBoostPositionsOrOffset.

public void testDocValuesDoNotAffectBoostPositionsOrOffset() throws Exception {
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox")));
    doc.add(new TextField("text", "quick brown fox", Field.Store.NO));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    TermsEnum tenum = leafReader.terms("text").iterator();
    assertEquals("brown", tenum.next().utf8ToString());
    PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(1, penum.nextPosition());
    assertEquals(6, penum.startOffset());
    assertEquals(11, penum.endOffset());
    assertEquals("fox", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(2, penum.nextPosition());
    assertEquals(12, penum.startOffset());
    assertEquals(15, penum.endOffset());
    assertEquals("quick", tenum.next().utf8ToString());
    penum = tenum.postings(penum, PostingsEnum.OFFSETS);
    assertEquals(0, penum.nextDoc());
    assertEquals(1, penum.freq());
    assertEquals(0, penum.nextPosition());
    assertEquals(0, penum.startOffset());
    assertEquals(5, penum.endOffset());
    BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("text");
    assertEquals(0, binaryDocValues.nextDoc());
    assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString());
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) PostingsEnum(org.apache.lucene.index.PostingsEnum) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 13 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestFieldCache method testDocValuesIntegration.

public void testDocValuesIntegration() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
    doc.add(new NumericDocValuesField("numeric", 42));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);
    // Binary type: can be retrieved via getTerms()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
    });
    BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
    assertEquals(0, binary.nextDoc());
    final BytesRef term = binary.binaryValue();
    assertEquals("binary value", term.utf8ToString());
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "binary");
    });
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
    });
    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "binary");
    });
    Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
    assertTrue(bits.get(0));
    // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
    });
    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sorted");
    });
    binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
    assertEquals(0, binary.nextDoc());
    BytesRef scratch = binary.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());
    SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
    assertEquals(0, sorted.nextDoc());
    assertEquals(0, sorted.ordValue());
    assertEquals(1, sorted.getValueCount());
    scratch = sorted.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());
    SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(1, sortedSet.getValueCount());
    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
    assertTrue(bits.get(0));
    // Numeric type: can be retrieved via getInts() and so on
    NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
    assertEquals(0, numeric.nextDoc());
    assertEquals(42, numeric.longValue());
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "numeric");
    });
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
    });
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
    });
    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "numeric");
    });
    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
    assertTrue(bits.get(0));
    // SortedSet type: can be retrieved via getDocTermOrds() 
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
    });
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "sortedset");
    });
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
    });
    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sortedset");
    });
    sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(1, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(2, sortedSet.getValueCount());
    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
    assertTrue(bits.get(0));
    ir.close();
    dir.close();
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Bits(org.apache.lucene.util.Bits) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 14 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestLucene54DocValuesFormat method doTestSparseDocValuesVsStoredFields.

private void doTestSparseDocValuesVsStoredFields() throws Exception {
    final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = random().nextLong();
    }
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    // sparse compression is only enabled if less than 1% of docs have a value
    final int avgGap = 100;
    final int numDocs = atLeast(200);
    for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
        writer.addDocument(new Document());
    }
    final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        // single-valued
        long docValue = values[random().nextInt(values.length)];
        doc.add(new NumericDocValuesField("numeric", docValue));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
        doc.add(new StoredField("value", docValue));
        // multi-valued
        final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
        for (int j = 0; j < numValues; ++j) {
            docValue = values[random().nextInt(values.length)];
            doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
            doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
            doc.add(new StoredField("values", docValue));
        }
        writer.addDocument(doc);
        // add a gap
        for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
            writer.addDocument(new Document());
        }
    }
    if (random().nextBoolean()) {
        writer.forceMerge(1);
    }
    final IndexReader indexReader = writer.getReader();
    TestUtil.checkReader(indexReader);
    writer.close();
    for (LeafReaderContext context : indexReader.leaves()) {
        final LeafReader reader = context.reader();
        final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
        final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
        final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
        final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
        final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
        for (int i = 0; i < reader.maxDoc(); ++i) {
            final Document doc = reader.document(i);
            final IndexableField valueField = doc.getField("value");
            final Long value = valueField == null ? null : valueField.numericValue().longValue();
            if (value == null) {
                assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
            } else {
                assertEquals(i, numeric.nextDoc());
                assertEquals(i, binary.nextDoc());
                assertEquals(i, sorted.nextDoc());
                assertEquals(value.longValue(), numeric.longValue());
                assertTrue(sorted.ordValue() >= 0);
                assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
                assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
            }
            final IndexableField[] valuesFields = doc.getFields("values");
            if (valuesFields.length == 0) {
                assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
            } else {
                final Set<Long> valueSet = new HashSet<>();
                for (IndexableField sf : valuesFields) {
                    valueSet.add(sf.numericValue().longValue());
                }
                assertEquals(i, sortedNumeric.nextDoc());
                assertEquals(valuesFields.length, sortedNumeric.docValueCount());
                for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
                    assertTrue(valueSet.contains(sortedNumeric.nextValue()));
                }
                assertEquals(i, sortedSet.nextDoc());
                int sortedSetCount = 0;
                while (true) {
                    long ord = sortedSet.nextOrd();
                    if (ord == SortedSetDocValues.NO_MORE_ORDS) {
                        break;
                    }
                    assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
                    sortedSetCount++;
                }
                assertEquals(valueSet.size(), sortedSetCount);
            }
        }
    }
    indexReader.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SparseNumericDocValues(org.apache.lucene.codecs.lucene54.Lucene54DocValuesProducer.SparseNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with BinaryDocValues

use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.

private void doTestSparseDocValuesVsStoredFields() throws Exception {
    final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = random().nextLong();
    }
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    // sparse compression is only enabled if less than 1% of docs have a value
    final int avgGap = 100;
    final int numDocs = atLeast(200);
    for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
        writer.addDocument(new Document());
    }
    final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        // single-valued
        long docValue = values[random().nextInt(values.length)];
        doc.add(new NumericDocValuesField("numeric", docValue));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
        doc.add(new StoredField("value", docValue));
        // multi-valued
        final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
        for (int j = 0; j < numValues; ++j) {
            docValue = values[random().nextInt(values.length)];
            doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
            doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
            doc.add(new StoredField("values", docValue));
        }
        writer.addDocument(doc);
        // add a gap
        for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
            writer.addDocument(new Document());
        }
    }
    if (random().nextBoolean()) {
        writer.forceMerge(1);
    }
    final IndexReader indexReader = writer.getReader();
    writer.close();
    for (LeafReaderContext context : indexReader.leaves()) {
        final LeafReader reader = context.reader();
        final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
        final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
        final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
        final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
        final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
        for (int i = 0; i < reader.maxDoc(); ++i) {
            final Document doc = reader.document(i);
            final IndexableField valueField = doc.getField("value");
            final Long value = valueField == null ? null : valueField.numericValue().longValue();
            if (value == null) {
                assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
            } else {
                assertEquals(i, numeric.nextDoc());
                assertEquals(i, binary.nextDoc());
                assertEquals(i, sorted.nextDoc());
                assertEquals(value.longValue(), numeric.longValue());
                assertTrue(sorted.ordValue() >= 0);
                assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
                assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
            }
            final IndexableField[] valuesFields = doc.getFields("values");
            if (valuesFields.length == 0) {
                assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
            } else {
                final Set<Long> valueSet = new HashSet<>();
                for (IndexableField sf : valuesFields) {
                    valueSet.add(sf.numericValue().longValue());
                }
                assertEquals(i, sortedNumeric.nextDoc());
                assertEquals(valuesFields.length, sortedNumeric.docValueCount());
                for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
                    assertTrue(valueSet.contains(sortedNumeric.nextValue()));
                }
                assertEquals(i, sortedSet.nextDoc());
                int sortedSetCount = 0;
                while (true) {
                    long ord = sortedSet.nextOrd();
                    if (ord == SortedSetDocValues.NO_MORE_ORDS) {
                        break;
                    }
                    assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
                    sortedSetCount++;
                }
                assertEquals(valueSet.size(), sortedSetCount);
            }
        }
    }
    indexReader.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

BinaryDocValues (org.apache.lucene.index.BinaryDocValues)37 BytesRef (org.apache.lucene.util.BytesRef)29 Document (org.apache.lucene.document.Document)13 LeafReader (org.apache.lucene.index.LeafReader)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)12 NumericDocValues (org.apache.lucene.index.NumericDocValues)11 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)11 Directory (org.apache.lucene.store.Directory)10 ArrayList (java.util.ArrayList)9 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)9 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)7 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)6 Bits (org.apache.lucene.util.Bits)6 IOException (java.io.IOException)5 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)5 IndexReader (org.apache.lucene.index.IndexReader)5 SortedNumericDocValues (org.apache.lucene.index.SortedNumericDocValues)5