Search in sources :

Example 26 with SortField

use of org.apache.lucene.search.SortField in project lucene-solr by apache.

the class TestBackwardsCompatibility method testCreateSortedIndex.

// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
    Path indexDir = getIndexDir().resolve("sorted");
    Files.deleteIfExists(indexDir);
    Directory dir = newFSDirectory(indexDir);
    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
    mp.setNoCFSRatio(1.0);
    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    // TODO: remove randomness
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setMergePolicy(mp);
    conf.setUseCompoundFile(false);
    conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
    IndexWriter writer = new IndexWriter(dir, conf);
    LineFileDocs docs = new LineFileDocs(random());
    SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
    parser.setTimeZone(TimeZone.getTimeZone("UTC"));
    ParsePosition position = new ParsePosition(0);
    Field dateDVField = null;
    for (int i = 0; i < 50; i++) {
        Document doc = docs.nextDoc();
        String dateString = doc.get("date");
        position.setIndex(0);
        Date date = parser.parse(dateString, position);
        if (position.getErrorIndex() != -1) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (position.getIndex() != dateString.length()) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (dateDVField == null) {
            dateDVField = new NumericDocValuesField("dateDV", 0l);
            doc.add(dateDVField);
        }
        dateDVField.setLongValue(date.getTime());
        if (i == 250) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    writer.close();
    dir.close();
}
Also used : Path(java.nio.file.Path) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) Date(java.util.Date) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) SimpleDateFormat(java.text.SimpleDateFormat) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) LineFileDocs(org.apache.lucene.util.LineFileDocs) ParsePosition(java.text.ParsePosition)

Example 27 with SortField

use of org.apache.lucene.search.SortField in project lucene-solr by apache.

the class DefaultIndexingChain method maybeSortSegment.

private Sorter.DocMap maybeSortSegment(SegmentWriteState state) throws IOException {
    Sort indexSort = state.segmentInfo.getIndexSort();
    if (indexSort == null) {
        return null;
    }
    List<Sorter.DocComparator> comparators = new ArrayList<>();
    for (int i = 0; i < indexSort.getSort().length; i++) {
        SortField sortField = indexSort.getSort()[i];
        PerField perField = getPerField(sortField.getField());
        if (perField != null && perField.docValuesWriter != null && finishedDocValues.contains(perField.fieldInfo.name) == false) {
            perField.docValuesWriter.finish(state.segmentInfo.maxDoc());
            Sorter.DocComparator cmp = perField.docValuesWriter.getDocComparator(state.segmentInfo.maxDoc(), sortField);
            comparators.add(cmp);
            finishedDocValues.add(perField.fieldInfo.name);
        } else {
        // safe to ignore, sort field with no values or already seen before
        }
    }
    Sorter sorter = new Sorter(indexSort);
    // returns null if the documents are already sorted
    return sorter.sort(state.segmentInfo.maxDoc(), comparators.toArray(new Sorter.DocComparator[comparators.size()]));
}
Also used : ArrayList(java.util.ArrayList) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField)

Example 28 with SortField

use of org.apache.lucene.search.SortField in project lucene-solr by apache.

the class SimpleTextSegmentInfoFormat method write.

@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
    String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
    try (IndexOutput output = dir.createOutput(segFileName, ioContext)) {
        // Only add the file once we've successfully created it, else IFD assert can trip:
        si.addFile(segFileName);
        BytesRefBuilder scratch = new BytesRefBuilder();
        SimpleTextUtil.write(output, SI_VERSION);
        SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_MIN_VERSION);
        if (si.getMinVersion() == null) {
            SimpleTextUtil.write(output, "null", scratch);
        } else {
            SimpleTextUtil.write(output, si.getMinVersion().toString(), scratch);
        }
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_DOCCOUNT);
        SimpleTextUtil.write(output, Integer.toString(si.maxDoc()), scratch);
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_USECOMPOUND);
        SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
        SimpleTextUtil.writeNewline(output);
        Map<String, String> diagnostics = si.getDiagnostics();
        int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
        SimpleTextUtil.write(output, SI_NUM_DIAG);
        SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
        SimpleTextUtil.writeNewline(output);
        if (numDiagnostics > 0) {
            for (Map.Entry<String, String> diagEntry : diagnostics.entrySet()) {
                SimpleTextUtil.write(output, SI_DIAG_KEY);
                SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
                SimpleTextUtil.writeNewline(output);
                SimpleTextUtil.write(output, SI_DIAG_VALUE);
                SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
                SimpleTextUtil.writeNewline(output);
            }
        }
        Map<String, String> attributes = si.getAttributes();
        SimpleTextUtil.write(output, SI_NUM_ATT);
        SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch);
        SimpleTextUtil.writeNewline(output);
        for (Map.Entry<String, String> attEntry : attributes.entrySet()) {
            SimpleTextUtil.write(output, SI_ATT_KEY);
            SimpleTextUtil.write(output, attEntry.getKey(), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_ATT_VALUE);
            SimpleTextUtil.write(output, attEntry.getValue(), scratch);
            SimpleTextUtil.writeNewline(output);
        }
        Set<String> files = si.files();
        int numFiles = files == null ? 0 : files.size();
        SimpleTextUtil.write(output, SI_NUM_FILES);
        SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
        SimpleTextUtil.writeNewline(output);
        if (numFiles > 0) {
            for (String fileName : files) {
                SimpleTextUtil.write(output, SI_FILE);
                SimpleTextUtil.write(output, fileName, scratch);
                SimpleTextUtil.writeNewline(output);
            }
        }
        SimpleTextUtil.write(output, SI_ID);
        SimpleTextUtil.write(output, new BytesRef(si.getId()));
        SimpleTextUtil.writeNewline(output);
        Sort indexSort = si.getIndexSort();
        SimpleTextUtil.write(output, SI_SORT);
        final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
        SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch);
        SimpleTextUtil.writeNewline(output);
        for (int i = 0; i < numSortFields; ++i) {
            final SortField sortField = indexSort.getSort()[i];
            SimpleTextUtil.write(output, SI_SORT_FIELD);
            SimpleTextUtil.write(output, sortField.getField(), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_SORT_TYPE);
            final String sortTypeString;
            final SortField.Type sortType;
            final boolean multiValued;
            if (sortField instanceof SortedSetSortField) {
                sortType = SortField.Type.STRING;
                multiValued = true;
            } else if (sortField instanceof SortedNumericSortField) {
                sortType = ((SortedNumericSortField) sortField).getNumericType();
                multiValued = true;
            } else {
                sortType = sortField.getType();
                multiValued = false;
            }
            switch(sortType) {
                case STRING:
                    if (multiValued) {
                        sortTypeString = "multi_valued_string";
                    } else {
                        sortTypeString = "string";
                    }
                    break;
                case LONG:
                    if (multiValued) {
                        sortTypeString = "multi_valued_long";
                    } else {
                        sortTypeString = "long";
                    }
                    break;
                case INT:
                    if (multiValued) {
                        sortTypeString = "multi_valued_int";
                    } else {
                        sortTypeString = "int";
                    }
                    break;
                case DOUBLE:
                    if (multiValued) {
                        sortTypeString = "multi_valued_double";
                    } else {
                        sortTypeString = "double";
                    }
                    break;
                case FLOAT:
                    if (multiValued) {
                        sortTypeString = "multi_valued_float";
                    } else {
                        sortTypeString = "float";
                    }
                    break;
                default:
                    throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
            }
            SimpleTextUtil.write(output, sortTypeString, scratch);
            SimpleTextUtil.writeNewline(output);
            if (sortField instanceof SortedSetSortField) {
                SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
                final String selectorString;
                if (selector == SortedSetSelector.Type.MIN) {
                    selectorString = "min";
                } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
                    selectorString = "middle_min";
                } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
                    selectorString = "middle_max";
                } else if (selector == SortedSetSelector.Type.MAX) {
                    selectorString = "max";
                } else {
                    throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
                }
                SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
                SimpleTextUtil.write(output, selectorString, scratch);
                SimpleTextUtil.writeNewline(output);
            } else if (sortField instanceof SortedNumericSortField) {
                SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
                final String selectorString;
                if (selector == SortedNumericSelector.Type.MIN) {
                    selectorString = "min";
                } else if (selector == SortedNumericSelector.Type.MAX) {
                    selectorString = "max";
                } else {
                    throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
                }
                SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
                SimpleTextUtil.write(output, selectorString, scratch);
                SimpleTextUtil.writeNewline(output);
            }
            SimpleTextUtil.write(output, SI_SORT_REVERSE);
            SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_SORT_MISSING);
            final Object missingValue = sortField.getMissingValue();
            final String missing;
            if (missingValue == null) {
                missing = "null";
            } else if (missingValue == SortField.STRING_FIRST) {
                missing = "first";
            } else if (missingValue == SortField.STRING_LAST) {
                missing = "last";
            } else {
                missing = missingValue.toString();
            }
            SimpleTextUtil.write(output, missing, scratch);
            SimpleTextUtil.writeNewline(output);
        }
        SimpleTextUtil.writeChecksum(output, scratch);
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) IndexOutput(org.apache.lucene.store.IndexOutput) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Sort(org.apache.lucene.search.Sort) SortedSetSelector(org.apache.lucene.search.SortedSetSelector) HashMap(java.util.HashMap) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Example 29 with SortField

use of org.apache.lucene.search.SortField in project lucene-solr by apache.

the class TestDemoParallelLeafReader method testNumericDVSort.

private static void testNumericDVSort(IndexSearcher s) throws IOException {
    // Confirm we can sort by the new DV field:
    TopDocs hits = s.search(new MatchAllDocsQuery(), 100, new Sort(new SortField("number", SortField.Type.LONG)));
    long last = Long.MIN_VALUE;
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        long value = Long.parseLong(s.doc(scoreDoc.doc).get("text").split(" ")[1]);
        assertTrue(value >= last);
        assertEquals(value, ((Long) ((FieldDoc) scoreDoc).fields[0]).longValue());
        last = value;
    }
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 30 with SortField

use of org.apache.lucene.search.SortField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testUpdateSegmentWithNoDocValues2.

@Test
public void testUpdateSegmentWithNoDocValues2() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // prevent merges, otherwise by the time updates are applied
    // (writer.close()), the segments might have merged and that update becomes
    // legit.
    conf.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter writer = new IndexWriter(dir, conf);
    // first segment with NDV
    Document doc = new Document();
    doc.add(new StringField("id", "doc0", Store.NO));
    doc.add(new NumericDocValuesField("ndv", 3));
    writer.addDocument(doc);
    doc = new Document();
    // document without 'ndv' field
    doc.add(new StringField("id", "doc4", Store.NO));
    writer.addDocument(doc);
    writer.commit();
    // second segment with no NDV, but another dv field "foo"
    doc = new Document();
    doc.add(new StringField("id", "doc1", Store.NO));
    doc.add(new NumericDocValuesField("foo", 3));
    writer.addDocument(doc);
    doc = new Document();
    // document that isn't updated
    doc.add(new StringField("id", "doc2", Store.NO));
    writer.addDocument(doc);
    writer.commit();
    // update document in the first segment - should not affect docsWithField of
    // the document without NDV field
    writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
    // update document in the second segment - field should be added and we should
    // be able to handle the other document correctly (e.g. no NPE)
    writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    for (LeafReaderContext context : reader.leaves()) {
        LeafReader r = context.reader();
        NumericDocValues ndv = r.getNumericDocValues("ndv");
        assertEquals(0, ndv.nextDoc());
        assertEquals(5L, ndv.longValue());
        assertTrue(ndv.nextDoc() > 1);
    }
    reader.close();
    TestUtil.checkIndex(dir);
    conf = newIndexWriterConfig(new MockAnalyzer(random()));
    writer = new IndexWriter(dir, conf);
    writer.forceMerge(1);
    writer.close();
    reader = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(reader);
    assertEquals(DocValuesType.NUMERIC, ar.getFieldInfos().fieldInfo("foo").getDocValuesType());
    IndexSearcher searcher = new IndexSearcher(reader);
    TopFieldDocs td;
    // doc0
    td = searcher.search(new TermQuery(new Term("id", "doc0")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
    assertEquals(5L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
    // doc1
    td = searcher.search(new TermQuery(new Term("id", "doc1")), 1, new Sort(new SortField("ndv", SortField.Type.LONG), new SortField("foo", SortField.Type.LONG)));
    assertEquals(5L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
    assertEquals(3L, ((FieldDoc) td.scoreDocs[0]).fields[1]);
    // doc2
    td = searcher.search(new TermQuery(new Term("id", "doc2")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
    assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
    // doc4
    td = searcher.search(new TermQuery(new Term("id", "doc4")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
    assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Aggregations

SortField (org.apache.lucene.search.SortField)230 Sort (org.apache.lucene.search.Sort)174 Document (org.apache.lucene.document.Document)116 Directory (org.apache.lucene.store.Directory)110 IndexSearcher (org.apache.lucene.search.IndexSearcher)90 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)75 TopDocs (org.apache.lucene.search.TopDocs)74 IndexReader (org.apache.lucene.index.IndexReader)65 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)62 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)56 SortedSetSortField (org.apache.lucene.search.SortedSetSortField)56 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)49 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)37 TermQuery (org.apache.lucene.search.TermQuery)36 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)32 Query (org.apache.lucene.search.Query)29 Term (org.apache.lucene.index.Term)25 BytesRef (org.apache.lucene.util.BytesRef)25 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)24 StoredField (org.apache.lucene.document.StoredField)23