Search in sources :

Example 1 with SortField

use of org.apache.lucene.search.SortField in project crate by crate.

the class SortSymbolVisitor method visitReference.

/**
     * generate a SortField from a Reference symbol.
     * <p>
     * the implementation is similar to what {@link org.elasticsearch.search.sort.SortParseElement}
     * does.
     */
@Override
public SortField visitReference(final Reference symbol, final SortSymbolContext context) {
    // can't use the SortField(fieldName, type) constructor
    // because values are saved using docValues and therefore they're indexed in lucene as binary and not
    // with the reference valueType.
    // this is why we use a custom comparator source with the same logic as ES
    ColumnIdent columnIdent = symbol.ident().columnIdent();
    if (columnIdent.isColumn()) {
        if (SortParseElement.SCORE_FIELD_NAME.equals(columnIdent.name())) {
            return !context.reverseFlag ? SORT_SCORE_REVERSE : SortParseElement.SORT_SCORE;
        } else if (DocSysColumns.RAW.equals(columnIdent) || DocSysColumns.ID.equals(columnIdent)) {
            return customSortField(DocSysColumns.nameForLucene(columnIdent), symbol, context, LUCENE_TYPE_MAP.get(symbol.valueType()), false);
        }
    }
    MultiValueMode sortMode = context.reverseFlag ? MultiValueMode.MAX : MultiValueMode.MIN;
    String indexName;
    IndexFieldData.XFieldComparatorSource fieldComparatorSource;
    MappedFieldType fieldType = fieldTypeLookup.get(columnIdent.fqn());
    if (fieldType == null) {
        indexName = columnIdent.fqn();
        fieldComparatorSource = new NullFieldComparatorSource(LUCENE_TYPE_MAP.get(symbol.valueType()), context.reverseFlag, context.nullFirst);
    } else {
        indexName = fieldType.names().indexName();
        fieldComparatorSource = context.context.fieldData().getForField(fieldType).comparatorSource(SortOrder.missing(context.reverseFlag, context.nullFirst), sortMode, null);
    }
    return new SortField(indexName, fieldComparatorSource, context.reverseFlag);
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexFieldData(org.elasticsearch.index.fielddata.IndexFieldData) SortField(org.apache.lucene.search.SortField) MultiValueMode(org.elasticsearch.search.MultiValueMode)

Example 2 with SortField

use of org.apache.lucene.search.SortField in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testNestedSorting.

public void testNestedSorting(MultiValueMode sortMode) throws IOException {
    final String[] values = new String[randomIntBetween(2, 20)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = TestUtil.randomSimpleString(random());
    }
    final int numParents = scaledRandomIntBetween(10, 3072);
    List<Document> docs = new ArrayList<>();
    FixedBitSet parents = new FixedBitSet(64);
    for (int i = 0; i < numParents; ++i) {
        docs.clear();
        final int numChildren = randomInt(4);
        for (int j = 0; j < numChildren; ++j) {
            final Document child = new Document();
            final int numValues = randomInt(3);
            for (int k = 0; k < numValues; ++k) {
                final String value = RandomPicks.randomFrom(random(), values);
                addField(child, "text", value);
            }
            docs.add(child);
        }
        final Document parent = new Document();
        parent.add(new StringField("type", "parent", Store.YES));
        final String value = RandomPicks.randomFrom(random(), values);
        if (value != null) {
            addField(parent, "text", value);
        }
        docs.add(parent);
        int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
        parents = FixedBitSet.ensureCapacity(parents, bit);
        parents.set(bit);
        writer.addDocuments(docs);
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    DirectoryReader directoryReader = DirectoryReader.open(writer);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    IndexFieldData<?> fieldData = getForField("text");
    final Object missingValue;
    switch(randomInt(4)) {
        case 0:
            missingValue = "_first";
            break;
        case 1:
            missingValue = "_last";
            break;
        case 2:
            missingValue = new BytesRef(RandomPicks.randomFrom(random(), values));
            break;
        default:
            missingValue = new BytesRef(TestUtil.randomSimpleString(random()));
            break;
    }
    Query parentFilter = new TermQuery(new Term("type", "parent"));
    Query childFilter = Queries.not(parentFilter);
    Nested nested = createNested(searcher, parentFilter, childFilter);
    BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested);
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None);
    Sort sort = new Sort(new SortField("text", nestedComparatorSource));
    TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
    assertTrue(topDocs.scoreDocs.length > 0);
    BytesRef previous = null;
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final int docID = topDocs.scoreDocs[i].doc;
        assertTrue("expected " + docID + " to be a parent", parents.get(docID));
        BytesRef cmpValue = null;
        for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
            String[] sVals = searcher.doc(child).getValues("text");
            final BytesRef[] vals;
            if (sVals.length == 0) {
                vals = new BytesRef[0];
            } else {
                vals = new BytesRef[sVals.length];
                for (int j = 0; j < vals.length; ++j) {
                    vals[j] = new BytesRef(sVals[j]);
                }
            }
            for (BytesRef value : vals) {
                if (cmpValue == null) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
                    cmpValue = value;
                }
            }
        }
        if (cmpValue == null) {
            if ("_first".equals(missingValue)) {
                cmpValue = new BytesRef();
            } else if ("_last".equals(missingValue) == false) {
                cmpValue = (BytesRef) missingValue;
            }
        }
        if (previous != null && cmpValue != null) {
            assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
        }
        previous = cmpValue;
    }
    searcher.getIndexReader().close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) ArrayList(java.util.ArrayList) Nested(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) FixedBitSet(org.apache.lucene.util.FixedBitSet) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) BytesRefFieldComparatorSource(org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource) Term(org.apache.lucene.index.Term) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) StringField(org.apache.lucene.document.StringField)

Example 3 with SortField

use of org.apache.lucene.search.SortField in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testSortMissing.

public void testSortMissing(boolean first, boolean reverse) throws IOException {
    final String[] values = new String[randomIntBetween(2, 10)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(random());
    }
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(random(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    final IndexFieldData indexFieldData = getForField("value");
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(first ? "_first" : "_last", MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = first ? null : reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        if (first && docValue == null) {
            assertNull(previousValue);
        } else if (!first && docValue != null) {
            assertNotNull(previousValue);
        }
        final BytesRef value = docValue == null ? null : new BytesRef(docValue);
        if (previousValue != null && value != null) {
            if (reverse) {
                assertTrue(previousValue.compareTo(value) >= 0);
            } else {
                assertTrue(previousValue.compareTo(value) <= 0);
            }
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) GlobalOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) XFieldComparatorSource(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 4 with SortField

use of org.apache.lucene.search.SortField in project elasticsearch by elastic.

the class AbstractFieldDataImplTestCase method testSortMultiValuesFields.

public void testSortMultiValuesFields() throws Exception {
    fillExtendedMvSet();
    IndexFieldData indexFieldData = getForField("value");
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MIN, null))));
    assertThat(topDocs.totalHits, equalTo(8));
    assertThat(topDocs.scoreDocs.length, equalTo(8));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(7));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString(), equalTo("!08"));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(0));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString(), equalTo("02"));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(2));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString(), equalTo("03"));
    assertThat(topDocs.scoreDocs[3].doc, equalTo(3));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString(), equalTo("04"));
    assertThat(topDocs.scoreDocs[4].doc, equalTo(4));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString(), equalTo("06"));
    assertThat(topDocs.scoreDocs[5].doc, equalTo(6));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("08"));
    assertThat(topDocs.scoreDocs[6].doc, equalTo(1));
    assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null));
    assertThat(topDocs.scoreDocs[7].doc, equalTo(5));
    assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null));
    topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MAX, null), true)));
    assertThat(topDocs.totalHits, equalTo(8));
    assertThat(topDocs.scoreDocs.length, equalTo(8));
    assertThat(topDocs.scoreDocs[0].doc, equalTo(6));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).utf8ToString(), equalTo("10"));
    assertThat(topDocs.scoreDocs[1].doc, equalTo(4));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).utf8ToString(), equalTo("08"));
    assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).utf8ToString(), equalTo("06"));
    assertThat(topDocs.scoreDocs[3].doc, equalTo(0));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).utf8ToString(), equalTo("04"));
    assertThat(topDocs.scoreDocs[4].doc, equalTo(2));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).utf8ToString(), equalTo("03"));
    assertThat(topDocs.scoreDocs[5].doc, equalTo(7));
    assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("!10"));
    assertThat(topDocs.scoreDocs[6].doc, equalTo(1));
    assertThat(((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null));
    assertThat(topDocs.scoreDocs[7].doc, equalTo(5));
    assertThat(((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery)

Example 5 with SortField

use of org.apache.lucene.search.SortField in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testActualMissingValue.

public void testActualMissingValue(boolean reverse) throws IOException {
    // missing value is set to an actual value
    final String[] values = new String[randomIntBetween(2, 30)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(random());
    }
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(random(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    final IndexFieldData indexFieldData = getForField("value");
    final String missingValue = values[1];
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(missingValue, MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        final BytesRef value = new BytesRef(docValue == null ? missingValue : docValue);
        if (reverse) {
            assertTrue(previousValue.compareTo(value) >= 0);
        } else {
            assertTrue(previousValue.compareTo(value) <= 0);
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) GlobalOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) XFieldComparatorSource(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

SortField (org.apache.lucene.search.SortField)230 Sort (org.apache.lucene.search.Sort)174 Document (org.apache.lucene.document.Document)116 Directory (org.apache.lucene.store.Directory)110 IndexSearcher (org.apache.lucene.search.IndexSearcher)90 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)75 TopDocs (org.apache.lucene.search.TopDocs)74 IndexReader (org.apache.lucene.index.IndexReader)65 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)62 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)56 SortedSetSortField (org.apache.lucene.search.SortedSetSortField)56 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)49 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)37 TermQuery (org.apache.lucene.search.TermQuery)36 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)32 Query (org.apache.lucene.search.Query)29 Term (org.apache.lucene.index.Term)25 BytesRef (org.apache.lucene.util.BytesRef)25 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)24 StoredField (org.apache.lucene.document.StoredField)23