Search in sources :

Example 11 with RandomAccessOrds

use of org.apache.lucene.index.RandomAccessOrds in project elasticsearch by elastic.

the class MultiOrdinalsTests method testRandomValues.

public void testRandomValues() throws IOException {
    Random random = random();
    int numDocs = 100 + random.nextInt(1000);
    int numOrdinals = 1 + random.nextInt(200);
    int numValues = 100 + random.nextInt(100000);
    OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
    Set<OrdAndId> ordsAndIdSet = new HashSet<>();
    for (int i = 0; i < numValues; i++) {
        ordsAndIdSet.add(new OrdAndId(random.nextInt(numOrdinals), random.nextInt(numDocs)));
    }
    List<OrdAndId> ordsAndIds = new ArrayList<>(ordsAndIdSet);
    Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {

        @Override
        public int compare(OrdAndId o1, OrdAndId o2) {
            if (o1.ord < o2.ord) {
                return -1;
            }
            if (o1.ord == o2.ord) {
                if (o1.id < o2.id) {
                    return -1;
                }
                if (o1.id > o2.id) {
                    return 1;
                }
                return 0;
            }
            return 1;
        }
    });
    long lastOrd = -1;
    for (OrdAndId ordAndId : ordsAndIds) {
        if (lastOrd != ordAndId.ord) {
            lastOrd = ordAndId.ord;
            builder.nextOrdinal();
        }
        // remap the ordinals in case we have gaps?
        ordAndId.ord = builder.currentOrdinal();
        builder.addDoc(ordAndId.id);
    }
    Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {

        @Override
        public int compare(OrdAndId o1, OrdAndId o2) {
            if (o1.id < o2.id) {
                return -1;
            }
            if (o1.id == o2.id) {
                if (o1.ord < o2.ord) {
                    return -1;
                }
                if (o1.ord > o2.ord) {
                    return 1;
                }
                return 0;
            }
            return 1;
        }
    });
    Ordinals ords = creationMultiOrdinals(builder);
    RandomAccessOrds docs = ords.ordinals();
    final SortedDocValues singleOrds = MultiValueMode.MIN.select(docs);
    int docId = ordsAndIds.get(0).id;
    List<Long> docOrds = new ArrayList<>();
    for (OrdAndId ordAndId : ordsAndIds) {
        if (docId == ordAndId.id) {
            docOrds.add(ordAndId.ord);
        } else {
            if (!docOrds.isEmpty()) {
                assertThat((long) singleOrds.getOrd(docId), equalTo(docOrds.get(0)));
                docs.setDocument(docId);
                final int numOrds = docs.cardinality();
                assertThat(numOrds, equalTo(docOrds.size()));
                for (int i = 0; i < numOrds; i++) {
                    assertThat(docs.nextOrd(), equalTo(docOrds.get(i)));
                }
                final long[] array = new long[docOrds.size()];
                for (int i = 0; i < array.length; i++) {
                    array[i] = docOrds.get(i);
                }
                assertIter(docs, docId, array);
            }
            for (int i = docId + 1; i < ordAndId.id; i++) {
                assertThat((long) singleOrds.getOrd(i), equalTo(RandomAccessOrds.NO_MORE_ORDS));
            }
            docId = ordAndId.id;
            docOrds.clear();
            docOrds.add(ordAndId.ord);
        }
    }
}
Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) ArrayList(java.util.ArrayList) SortedDocValues(org.apache.lucene.index.SortedDocValues) Random(java.util.Random) HashSet(java.util.HashSet)

Example 12 with RandomAccessOrds

use of org.apache.lucene.index.RandomAccessOrds in project elasticsearch by elastic.

the class FilterFieldDataTests method testFilterByFrequency.

public void testFilterByFrequency() throws Exception {
    Random random = random();
    for (int i = 0; i < 1000; i++) {
        Document d = new Document();
        d.add(new StringField("id", "" + i, Field.Store.NO));
        if (i % 100 == 0) {
            d.add(new StringField("high_freq", "100", Field.Store.NO));
            d.add(new StringField("low_freq", "100", Field.Store.NO));
            d.add(new StringField("med_freq", "100", Field.Store.NO));
        }
        if (i % 10 == 0) {
            d.add(new StringField("high_freq", "10", Field.Store.NO));
            d.add(new StringField("med_freq", "10", Field.Store.NO));
        }
        if (i % 5 == 0) {
            d.add(new StringField("high_freq", "5", Field.Store.NO));
        }
        writer.addDocument(d);
    }
    writer.forceMerge(1, true);
    List<LeafReaderContext> contexts = refreshReader();
    final BuilderContext builderCtx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1));
    {
        ifdService.clear();
        MappedFieldType ft = new TextFieldMapper.Builder("high_freq").fielddata(true).fielddataFrequencyFilter(0, random.nextBoolean() ? 100 : 0.5d, 0).build(builderCtx).fieldType();
        IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
        for (LeafReaderContext context : contexts) {
            AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
            RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
            assertThat(2L, equalTo(bytesValues.getValueCount()));
            assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
            assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
        }
    }
    {
        ifdService.clear();
        MappedFieldType ft = new TextFieldMapper.Builder("high_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, 201, 100).build(builderCtx).fieldType();
        IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
        for (LeafReaderContext context : contexts) {
            AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
            RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
            assertThat(1L, equalTo(bytesValues.getValueCount()));
            assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("5"));
        }
    }
    {
        // test # docs with value
        ifdService.clear();
        MappedFieldType ft = new TextFieldMapper.Builder("med_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, Integer.MAX_VALUE, 101).build(builderCtx).fieldType();
        IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
        for (LeafReaderContext context : contexts) {
            AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
            RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
            assertThat(2L, equalTo(bytesValues.getValueCount()));
            assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
            assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
        }
    }
    {
        ifdService.clear();
        MappedFieldType ft = new TextFieldMapper.Builder("med_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, Integer.MAX_VALUE, 101).build(builderCtx).fieldType();
        IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
        for (LeafReaderContext context : contexts) {
            AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
            RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
            assertThat(2L, equalTo(bytesValues.getValueCount()));
            assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
            assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
        }
    }
}
Also used : RandomAccessOrds(org.apache.lucene.index.RandomAccessOrds) ContentPath(org.elasticsearch.index.mapper.ContentPath) Document(org.apache.lucene.document.Document) Random(java.util.Random) StringField(org.apache.lucene.document.StringField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BuilderContext(org.elasticsearch.index.mapper.Mapper.BuilderContext) TextFieldMapper(org.elasticsearch.index.mapper.TextFieldMapper)

Aggregations

RandomAccessOrds (org.apache.lucene.index.RandomAccessOrds)12 SortedDocValues (org.apache.lucene.index.SortedDocValues)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 BytesRef (org.apache.lucene.util.BytesRef)3 HashSet (java.util.HashSet)2 Random (java.util.Random)2 FixedBitSet (org.apache.lucene.util.FixedBitSet)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeSet (java.util.TreeSet)1 Document (org.apache.lucene.document.Document)1 StringField (org.apache.lucene.document.StringField)1 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)1 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)1 Scorer (org.apache.lucene.search.Scorer)1 BitSet (org.apache.lucene.util.BitSet)1 LongBitSet (org.apache.lucene.util.LongBitSet)1 GeoPoint (org.elasticsearch.common.geo.GeoPoint)1 AbstractRandomAccessOrds (org.elasticsearch.index.fielddata.AbstractRandomAccessOrds)1