use of org.apache.lucene.search.SortField in project lucene-solr by apache.
the class TestBackwardsCompatibility method testCreateSortedIndex.
// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
Path indexDir = getIndexDir().resolve("sorted");
Files.deleteIfExists(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
conf.setMergePolicy(mp);
conf.setUseCompoundFile(false);
conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
IndexWriter writer = new IndexWriter(dir, conf);
LineFileDocs docs = new LineFileDocs(random());
SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
parser.setTimeZone(TimeZone.getTimeZone("UTC"));
ParsePosition position = new ParsePosition(0);
Field dateDVField = null;
for (int i = 0; i < 50; i++) {
Document doc = docs.nextDoc();
String dateString = doc.get("date");
position.setIndex(0);
Date date = parser.parse(dateString, position);
if (position.getErrorIndex() != -1) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (position.getIndex() != dateString.length()) {
throw new AssertionError("failed to parse \"" + dateString + "\" as date");
}
if (dateDVField == null) {
dateDVField = new NumericDocValuesField("dateDV", 0l);
doc.add(dateDVField);
}
dateDVField.setLongValue(date.getTime());
if (i == 250) {
writer.commit();
}
writer.addDocument(doc);
}
writer.forceMerge(1);
writer.close();
dir.close();
}
use of org.apache.lucene.search.SortField in project lucene-solr by apache.
the class DefaultIndexingChain method maybeSortSegment.
private Sorter.DocMap maybeSortSegment(SegmentWriteState state) throws IOException {
Sort indexSort = state.segmentInfo.getIndexSort();
if (indexSort == null) {
return null;
}
List<Sorter.DocComparator> comparators = new ArrayList<>();
for (int i = 0; i < indexSort.getSort().length; i++) {
SortField sortField = indexSort.getSort()[i];
PerField perField = getPerField(sortField.getField());
if (perField != null && perField.docValuesWriter != null && finishedDocValues.contains(perField.fieldInfo.name) == false) {
perField.docValuesWriter.finish(state.segmentInfo.maxDoc());
Sorter.DocComparator cmp = perField.docValuesWriter.getDocComparator(state.segmentInfo.maxDoc(), sortField);
comparators.add(cmp);
finishedDocValues.add(perField.fieldInfo.name);
} else {
// safe to ignore, sort field with no values or already seen before
}
}
Sorter sorter = new Sorter(indexSort);
// returns null if the documents are already sorted
return sorter.sort(state.segmentInfo.maxDoc(), comparators.toArray(new Sorter.DocComparator[comparators.size()]));
}
use of org.apache.lucene.search.SortField in project lucene-solr by apache.
the class SimpleTextSegmentInfoFormat method write.
@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
try (IndexOutput output = dir.createOutput(segFileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(segFileName);
BytesRefBuilder scratch = new BytesRefBuilder();
SimpleTextUtil.write(output, SI_VERSION);
SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_MIN_VERSION);
if (si.getMinVersion() == null) {
SimpleTextUtil.write(output, "null", scratch);
} else {
SimpleTextUtil.write(output, si.getMinVersion().toString(), scratch);
}
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DOCCOUNT);
SimpleTextUtil.write(output, Integer.toString(si.maxDoc()), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_USECOMPOUND);
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
SimpleTextUtil.writeNewline(output);
Map<String, String> diagnostics = si.getDiagnostics();
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
SimpleTextUtil.write(output, SI_NUM_DIAG);
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
SimpleTextUtil.writeNewline(output);
if (numDiagnostics > 0) {
for (Map.Entry<String, String> diagEntry : diagnostics.entrySet()) {
SimpleTextUtil.write(output, SI_DIAG_KEY);
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DIAG_VALUE);
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Map<String, String> attributes = si.getAttributes();
SimpleTextUtil.write(output, SI_NUM_ATT);
SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch);
SimpleTextUtil.writeNewline(output);
for (Map.Entry<String, String> attEntry : attributes.entrySet()) {
SimpleTextUtil.write(output, SI_ATT_KEY);
SimpleTextUtil.write(output, attEntry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_ATT_VALUE);
SimpleTextUtil.write(output, attEntry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
SimpleTextUtil.writeNewline(output);
if (numFiles > 0) {
for (String fileName : files) {
SimpleTextUtil.write(output, SI_FILE);
SimpleTextUtil.write(output, fileName, scratch);
SimpleTextUtil.writeNewline(output);
}
}
SimpleTextUtil.write(output, SI_ID);
SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
Sort indexSort = si.getIndexSort();
SimpleTextUtil.write(output, SI_SORT);
final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch);
SimpleTextUtil.writeNewline(output);
for (int i = 0; i < numSortFields; ++i) {
final SortField sortField = indexSort.getSort()[i];
SimpleTextUtil.write(output, SI_SORT_FIELD);
SimpleTextUtil.write(output, sortField.getField(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_SORT_TYPE);
final String sortTypeString;
final SortField.Type sortType;
final boolean multiValued;
if (sortField instanceof SortedSetSortField) {
sortType = SortField.Type.STRING;
multiValued = true;
} else if (sortField instanceof SortedNumericSortField) {
sortType = ((SortedNumericSortField) sortField).getNumericType();
multiValued = true;
} else {
sortType = sortField.getType();
multiValued = false;
}
switch(sortType) {
case STRING:
if (multiValued) {
sortTypeString = "multi_valued_string";
} else {
sortTypeString = "string";
}
break;
case LONG:
if (multiValued) {
sortTypeString = "multi_valued_long";
} else {
sortTypeString = "long";
}
break;
case INT:
if (multiValued) {
sortTypeString = "multi_valued_int";
} else {
sortTypeString = "int";
}
break;
case DOUBLE:
if (multiValued) {
sortTypeString = "multi_valued_double";
} else {
sortTypeString = "double";
}
break;
case FLOAT:
if (multiValued) {
sortTypeString = "multi_valued_float";
} else {
sortTypeString = "float";
}
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
SimpleTextUtil.write(output, sortTypeString, scratch);
SimpleTextUtil.writeNewline(output);
if (sortField instanceof SortedSetSortField) {
SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
final String selectorString;
if (selector == SortedSetSelector.Type.MIN) {
selectorString = "min";
} else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
selectorString = "middle_min";
} else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
selectorString = "middle_max";
} else if (selector == SortedSetSelector.Type.MAX) {
selectorString = "max";
} else {
throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
}
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
SimpleTextUtil.write(output, selectorString, scratch);
SimpleTextUtil.writeNewline(output);
} else if (sortField instanceof SortedNumericSortField) {
SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
final String selectorString;
if (selector == SortedNumericSelector.Type.MIN) {
selectorString = "min";
} else if (selector == SortedNumericSelector.Type.MAX) {
selectorString = "max";
} else {
throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
}
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
SimpleTextUtil.write(output, selectorString, scratch);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.write(output, SI_SORT_REVERSE);
SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_SORT_MISSING);
final Object missingValue = sortField.getMissingValue();
final String missing;
if (missingValue == null) {
missing = "null";
} else if (missingValue == SortField.STRING_FIRST) {
missing = "first";
} else if (missingValue == SortField.STRING_LAST) {
missing = "last";
} else {
missing = missingValue.toString();
}
SimpleTextUtil.write(output, missing, scratch);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.writeChecksum(output, scratch);
}
}
use of org.apache.lucene.search.SortField in project lucene-solr by apache.
the class TestDemoParallelLeafReader method testNumericDVSort.
private static void testNumericDVSort(IndexSearcher s) throws IOException {
// Confirm we can sort by the new DV field:
TopDocs hits = s.search(new MatchAllDocsQuery(), 100, new Sort(new SortField("number", SortField.Type.LONG)));
long last = Long.MIN_VALUE;
for (ScoreDoc scoreDoc : hits.scoreDocs) {
long value = Long.parseLong(s.doc(scoreDoc.doc).get("text").split(" ")[1]);
assertTrue(value >= last);
assertEquals(value, ((Long) ((FieldDoc) scoreDoc).fields[0]).longValue());
last = value;
}
}
use of org.apache.lucene.search.SortField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testUpdateSegmentWithNoDocValues2.
@Test
public void testUpdateSegmentWithNoDocValues2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// prevent merges, otherwise by the time updates are applied
// (writer.close()), the segments might have merged and that update becomes
// legit.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
IndexWriter writer = new IndexWriter(dir, conf);
// first segment with NDV
Document doc = new Document();
doc.add(new StringField("id", "doc0", Store.NO));
doc.add(new NumericDocValuesField("ndv", 3));
writer.addDocument(doc);
doc = new Document();
// document without 'ndv' field
doc.add(new StringField("id", "doc4", Store.NO));
writer.addDocument(doc);
writer.commit();
// second segment with no NDV, but another dv field "foo"
doc = new Document();
doc.add(new StringField("id", "doc1", Store.NO));
doc.add(new NumericDocValuesField("foo", 3));
writer.addDocument(doc);
doc = new Document();
// document that isn't updated
doc.add(new StringField("id", "doc2", Store.NO));
writer.addDocument(doc);
writer.commit();
// update document in the first segment - should not affect docsWithField of
// the document without NDV field
writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
// update document in the second segment - field should be added and we should
// be able to handle the other document correctly (e.g. no NPE)
writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
for (LeafReaderContext context : reader.leaves()) {
LeafReader r = context.reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
assertEquals(0, ndv.nextDoc());
assertEquals(5L, ndv.longValue());
assertTrue(ndv.nextDoc() > 1);
}
reader.close();
TestUtil.checkIndex(dir);
conf = newIndexWriterConfig(new MockAnalyzer(random()));
writer = new IndexWriter(dir, conf);
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(reader);
assertEquals(DocValuesType.NUMERIC, ar.getFieldInfos().fieldInfo("foo").getDocValuesType());
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs td;
// doc0
td = searcher.search(new TermQuery(new Term("id", "doc0")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(5L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
// doc1
td = searcher.search(new TermQuery(new Term("id", "doc1")), 1, new Sort(new SortField("ndv", SortField.Type.LONG), new SortField("foo", SortField.Type.LONG)));
assertEquals(5L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
assertEquals(3L, ((FieldDoc) td.scoreDocs[0]).fields[1]);
// doc2
td = searcher.search(new TermQuery(new Term("id", "doc2")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
// doc4
td = searcher.search(new TermQuery(new Term("id", "doc4")), 1, new Sort(new SortField("ndv", SortField.Type.LONG)));
assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
reader.close();
dir.close();
}
Aggregations