Examples with SortedDocValues - org.apache.lucene.index.SortedDocValues

Example 31 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class SimpleTextDocValuesWriter method addSortedField.

@Override
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
    assert fieldSeen(field.name);
    assert field.getDocValuesType() == DocValuesType.SORTED;
    writeFieldEntry(field, DocValuesType.SORTED);
    int valueCount = 0;
    int maxLength = -1;
    TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
    for (BytesRef value = terms.next(); value != null; value = terms.next()) {
        maxLength = Math.max(maxLength, value.length);
        valueCount++;
    }
    // write numValues
    SimpleTextUtil.write(data, NUMVALUES);
    SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
    SimpleTextUtil.writeNewline(data);
    // write maxLength
    SimpleTextUtil.write(data, MAXLENGTH);
    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
    SimpleTextUtil.writeNewline(data);
    int maxBytesLength = Integer.toString(maxLength).length();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < maxBytesLength; i++) {
        sb.append('0');
    }
    // write our pattern for encoding lengths
    SimpleTextUtil.write(data, PATTERN);
    SimpleTextUtil.write(data, sb.toString(), scratch);
    SimpleTextUtil.writeNewline(data);
    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
    int maxOrdBytes = Long.toString(valueCount + 1L).length();
    sb.setLength(0);
    for (int i = 0; i < maxOrdBytes; i++) {
        sb.append('0');
    }
    // write our pattern for ords
    SimpleTextUtil.write(data, ORDPATTERN);
    SimpleTextUtil.write(data, sb.toString(), scratch);
    SimpleTextUtil.writeNewline(data);
    final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
    // for asserts:
    int valuesSeen = 0;
    terms = valuesProducer.getSorted(field).termsEnum();
    for (BytesRef value = terms.next(); value != null; value = terms.next()) {
        // write length
        SimpleTextUtil.write(data, LENGTH);
        SimpleTextUtil.write(data, encoder.format(value.length), scratch);
        SimpleTextUtil.writeNewline(data);
        // write bytes -- don't use SimpleText.write
        // because it escapes:
        data.writeBytes(value.bytes, value.offset, value.length);
        // pad to fit
        for (int i = value.length; i < maxLength; i++) {
            data.writeByte((byte) ' ');
        }
        SimpleTextUtil.writeNewline(data);
        valuesSeen++;
        assert valuesSeen <= valueCount;
    }
    assert valuesSeen == valueCount;
    SortedDocValues values = valuesProducer.getSorted(field);
    for (int i = 0; i < numDocs; ++i) {
        if (values.docID() < i) {
            values.nextDoc();
            assert values.docID() >= i;
        }
        int ord = -1;
        if (values.docID() == i) {
            ord = values.ordValue();
        }
        SimpleTextUtil.write(data, ordEncoder.format(ord + 1L), scratch);
        SimpleTextUtil.writeNewline(data);
    }
}

Also used : DecimalFormatSymbols(java.text.DecimalFormatSymbols) DecimalFormat(java.text.DecimalFormat) BytesRef(org.apache.lucene.util.BytesRef) SortedDocValues(org.apache.lucene.index.SortedDocValues) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 32 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.

the class TestCollationDocValuesField method doTestRanges.

private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception {
    SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
    for (int docID = 0; docID < is.getIndexReader().maxDoc(); docID++) {
        Document doc = is.doc(docID);
        String s = doc.getField("field").stringValue();
        boolean collatorAccepts = collate(collator, s, startPoint) >= 0 && collate(collator, s, endPoint) <= 0;
        assertEquals(docID, dvs.nextDoc());
        BytesRef br = dvs.binaryValue();
        boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
        assertEquals(startPoint + " <= " + s + " <= " + endPoint, collatorAccepts, luceneAccepts);
    }
}

Also used : Document(org.apache.lucene.document.Document) SortedDocValues(org.apache.lucene.index.SortedDocValues) BytesRef(org.apache.lucene.util.BytesRef)

Example 33 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class ParentToChildrenAggregator method doPostCollection.

@Override
protected void doPostCollection() throws IOException {
    IndexReader indexReader = context().searcher().getIndexReader();
    for (LeafReaderContext ctx : indexReader.leaves()) {
        Scorer childDocsScorer = childFilter.scorer(ctx);
        if (childDocsScorer == null) {
            continue;
        }
        DocIdSetIterator childDocsIter = childDocsScorer.iterator();
        final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx);
        final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx);
        // Set the scorer, since we now replay only the child docIds
        sub.setScorer(new ConstantScoreScorer(null, 1f, childDocsIter));
        final Bits liveDocs = ctx.reader().getLiveDocs();
        for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) {
            if (liveDocs != null && liveDocs.get(docId) == false) {
                continue;
            }
            long globalOrdinal = globalOrdinals.getOrd(docId);
            if (globalOrdinal != -1) {
                long bucketOrd = parentOrdToBuckets.get(globalOrdinal);
                if (bucketOrd != -1) {
                    collectBucket(sub, docId, bucketOrd);
                    if (multipleBucketsPerParentOrd) {
                        long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal);
                        if (otherBucketOrds != null) {
                            for (long otherBucketOrd : otherBucketOrds) {
                                collectBucket(sub, docId, otherBucketOrd);
                            }
                        }
                    }
                }
            }
        }
    }
}

Also used : LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) Scorer(org.apache.lucene.search.Scorer) Bits(org.apache.lucene.util.Bits) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 34 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class ReplaceMissingTests method test.

public void test() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new SortedDocValuesField("field", new BytesRef("cat")));
    iw.addDocument(doc);
    doc = new Document();
    iw.addDocument(doc);
    doc = new Document();
    doc.add(new SortedDocValuesField("field", new BytesRef("dog")));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(reader);
    SortedDocValues raw = ar.getSortedDocValues("field");
    assertEquals(2, raw.getValueCount());
    // existing values
    SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat"));
    assertEquals(2, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());
    assertEquals(0, dv.getOrd(0));
    assertEquals(0, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));
    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog"));
    assertEquals(2, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());
    assertEquals(0, dv.getOrd(0));
    assertEquals(1, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));
    // non-existing values
    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple"));
    assertEquals(3, dv.getValueCount());
    assertEquals("apple", dv.lookupOrd(0).utf8ToString());
    assertEquals("cat", dv.lookupOrd(1).utf8ToString());
    assertEquals("dog", dv.lookupOrd(2).utf8ToString());
    assertEquals(1, dv.getOrd(0));
    assertEquals(0, dv.getOrd(1));
    assertEquals(2, dv.getOrd(2));
    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company"));
    assertEquals(3, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("company", dv.lookupOrd(1).utf8ToString());
    assertEquals("dog", dv.lookupOrd(2).utf8ToString());
    assertEquals(0, dv.getOrd(0));
    assertEquals(1, dv.getOrd(1));
    assertEquals(2, dv.getOrd(2));
    dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay"));
    assertEquals(3, dv.getValueCount());
    assertEquals("cat", dv.lookupOrd(0).utf8ToString());
    assertEquals("dog", dv.lookupOrd(1).utf8ToString());
    assertEquals("ebay", dv.lookupOrd(2).utf8ToString());
    assertEquals(0, dv.getOrd(0));
    assertEquals(2, dv.getOrd(1));
    assertEquals(1, dv.getOrd(2));
    reader.close();
    dir.close();
}

Also used : LeafReader(org.apache.lucene.index.LeafReader) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) SortedDocValues(org.apache.lucene.index.SortedDocValues) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 35 with SortedDocValues

use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.

the class MultiOrdinalsTests method testRandomValues.

public void testRandomValues() throws IOException {
    Random random = random();
    int numDocs = 100 + random.nextInt(1000);
    int numOrdinals = 1 + random.nextInt(200);
    int numValues = 100 + random.nextInt(100000);
    OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
    Set<OrdAndId> ordsAndIdSet = new HashSet<>();
    for (int i = 0; i < numValues; i++) {
        ordsAndIdSet.add(new OrdAndId(random.nextInt(numOrdinals), random.nextInt(numDocs)));
    }
    List<OrdAndId> ordsAndIds = new ArrayList<>(ordsAndIdSet);
    Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {

        @Override
        public int compare(OrdAndId o1, OrdAndId o2) {
            if (o1.ord < o2.ord) {
                return -1;
            }
            if (o1.ord == o2.ord) {
                if (o1.id < o2.id) {
                    return -1;
                }
                if (o1.id > o2.id) {
                    return 1;
                }
                return 0;
            }
            return 1;
        }
    });
    long lastOrd = -1;
    for (OrdAndId ordAndId : ordsAndIds) {
        if (lastOrd != ordAndId.ord) {
            lastOrd = ordAndId.ord;
            builder.nextOrdinal();
        }
        // remap the ordinals in case we have gaps?
        ordAndId.ord = builder.currentOrdinal();
        builder.addDoc(ordAndId.id);
    }
    Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {

        @Override
        public int compare(OrdAndId o1, OrdAndId o2) {
            if (o1.id < o2.id) {
                return -1;
            }
            if (o1.id == o2.id) {
                if (o1.ord < o2.ord) {
                    return -1;
                }
                if (o1.ord > o2.ord) {
                    return 1;
                }
                return 0;
            }
            return 1;
        }
    });
    Ordinals ords = creationMultiOrdinals(builder);
    RandomAccessOrds docs = ords.ordinals();
    final SortedDocValues singleOrds = MultiValueMode.MIN.select(docs);
    int docId = ordsAndIds.get(0).id;
    List<Long> docOrds = new ArrayList<>();
    for (OrdAndId ordAndId : ordsAndIds) {
        if (docId == ordAndId.id) {
            docOrds.add(ordAndId.ord);
        } else {
            if (!docOrds.isEmpty()) {
                assertThat((long) singleOrds.getOrd(docId), equalTo(docOrds.get(0)));
                docs.setDocument(docId);
                final int numOrds = docs.cardinality();
                assertThat(numOrds, equalTo(docOrds.size()));
                for (int i = 0; i < numOrds; i++) {
                    assertThat(docs.nextOrd(), equalTo(docOrds.get(i)));
                }
                final long[] array = new long[docOrds.size()];
                for (int i = 0; i < array.length; i++) {
                    array[i] = docOrds.get(i);
                }
                assertIter(docs, docId, array);
            }
            for (int i = docId + 1; i < ordAndId.id; i++) {
                assertThat((long) singleOrds.getOrd(i), equalTo(RandomAccessOrds.NO_MORE_ORDS));
            }
            docId = ordAndId.id;
            docOrds.clear();
            docOrds.add(ordAndId.ord);
        }
    }
}

Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) ArrayList(java.util.ArrayList) SortedDocValues(org.apache.lucene.index.SortedDocValues) Random(java.util.Random) HashSet(java.util.HashSet)

Aggregations

SortedDocValues (org.apache.lucene.index.SortedDocValues)66 BytesRef (org.apache.lucene.util.BytesRef)32 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)27 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)23 LeafReader (org.apache.lucene.index.LeafReader)22 Document (org.apache.lucene.document.Document)21 NumericDocValues (org.apache.lucene.index.NumericDocValues)15 Directory (org.apache.lucene.store.Directory)15 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)14 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)13 IOException (java.io.IOException)12 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)12 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 MultiDocValues (org.apache.lucene.index.MultiDocValues)10 ArrayList (java.util.ArrayList)9 IndexReader (org.apache.lucene.index.IndexReader)9 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)9 DoublePoint (org.apache.lucene.document.DoublePoint)8 FloatPoint (org.apache.lucene.document.FloatPoint)8 IntPoint (org.apache.lucene.document.IntPoint)8