use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestAddIndexes method addDocsWithID.
// just like addDocs but with ID, starting from docStart
private void addDocsWithID(IndexWriter writer, int numDocs, int docStart) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("content", "aaa", Field.Store.NO));
doc.add(newTextField("id", "" + (docStart + i), Field.Store.YES));
doc.add(new IntPoint("doc", i));
doc.add(new IntPoint("doc2d", i, i));
doc.add(new NumericDocValuesField("dv", i));
writer.addDocument(doc);
}
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexWriterOnVMError method doTest.
// just one thread, serial merge policy, hopefully debuggable
private void doTest(MockDirectoryWrapper.Failure failOn) throws Exception {
// log all exceptions we hit, in case we fail (for debugging)
ByteArrayOutputStream exceptionLog = new ByteArrayOutputStream();
PrintStream exceptionStream = new PrintStream(exceptionLog, true, "UTF-8");
//PrintStream exceptionStream = System.out;
final long analyzerSeed = random().nextLong();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// we are gonna make it angry
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
// emit some payloads
if (fieldName.contains("payloads")) {
stream = new MockVariableLengthPayloadFilter(new Random(analyzerSeed), stream);
}
return new TokenStreamComponents(tokenizer, stream);
}
};
MockDirectoryWrapper dir = null;
final int numIterations = TEST_NIGHTLY ? atLeast(100) : atLeast(5);
STARTOVER: for (int iter = 0; iter < numIterations; iter++) {
try {
// close from last run
if (dir != null) {
dir.close();
}
// disable slow things: we don't rely upon sleeps here.
dir = newMockDirectory();
dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
dir.setUseSlowOpenClosers(false);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
// just for now, try to keep this test reproducible
conf.setMergeScheduler(new SerialMergeScheduler());
// test never makes it this far...
int numDocs = atLeast(2000);
IndexWriter iw = new IndexWriter(dir, conf);
// ensure there is always a commit
iw.commit();
dir.failOn(failOn);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
doc.add(new NumericDocValuesField("dv", i));
doc.add(new BinaryDocValuesField("dv2", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("dv3", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i - 1))));
doc.add(new SortedNumericDocValuesField("dv5", i));
doc.add(new SortedNumericDocValuesField("dv5", i - 1));
doc.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
// ensure we store something
doc.add(new StoredField("stored1", "foo"));
doc.add(new StoredField("stored1", "bar"));
// ensure we get some payloads
doc.add(newTextField("text_payloads", TestUtil.randomAnalysisString(random(), 6, true), Field.Store.NO));
// ensure we get some vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
doc.add(new IntPoint("point", random().nextInt()));
doc.add(new IntPoint("point2d", random().nextInt(), random().nextInt()));
if (random().nextInt(10) > 0) {
// single doc
try {
iw.addDocument(doc);
// we made it, sometimes delete our doc, or update a dv
int thingToDo = random().nextInt(4);
if (thingToDo == 0) {
iw.deleteDocuments(new Term("id", Integer.toString(i)));
} else if (thingToDo == 1) {
iw.updateNumericDocValue(new Term("id", Integer.toString(i)), "dv", i + 1L);
} else if (thingToDo == 2) {
iw.updateBinaryDocValue(new Term("id", Integer.toString(i)), "dv2", new BytesRef(Integer.toString(i + 1)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} else {
// block docs
Document doc2 = new Document();
doc2.add(newStringField("id", Integer.toString(-i), Field.Store.NO));
doc2.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
doc2.add(new StoredField("stored1", "foo"));
doc2.add(new StoredField("stored1", "bar"));
doc2.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
try {
iw.addDocuments(Arrays.asList(doc, doc2));
// we made it, sometimes delete our docs
if (random().nextBoolean()) {
iw.deleteDocuments(new Term("id", Integer.toString(i)), new Term("id", Integer.toString(-i)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
if (random().nextInt(10) == 0) {
// trigger flush:
try {
if (random().nextBoolean()) {
DirectoryReader ir = null;
try {
ir = DirectoryReader.open(iw, random().nextBoolean(), false);
TestUtil.checkReader(ir);
} finally {
IOUtils.closeWhileHandlingException(ir);
}
} else {
iw.commit();
}
if (DirectoryReader.indexExists(dir)) {
TestUtil.checkIndex(dir);
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
}
try {
iw.close();
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} catch (Throwable t) {
System.out.println("Unexpected exception: dumping fake-exception-log:...");
exceptionStream.flush();
System.out.println(exceptionLog.toString("UTF-8"));
System.out.flush();
Rethrow.rethrow(t);
}
}
dir.close();
if (VERBOSE) {
System.out.println("TEST PASSED: dumping fake-exception-log:...");
System.out.println(exceptionLog.toString("UTF-8"));
}
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testMissingMultiValuedIntFirst.
public void testMissingMultiValuedIntFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.INT);
sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", 18));
doc.add(new SortedNumericDocValuesField("foo", 187667));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", 7));
doc.add(new SortedNumericDocValuesField("foo", 34));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testMissingMultiValuedFloatFirst.
public void testMissingMultiValuedFloatFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortedNumericSortField("foo", SortField.Type.FLOAT);
sortField.setMissingValue(Float.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("id", 3));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(726.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
doc = new Document();
doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("id", 2));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("id");
assertEquals(0, values.nextDoc());
assertEquals(1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(2, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(3, values.longValue());
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testRandom2.
public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
POSITIONS_TYPE.freeze();
FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
TERM_VECTORS_TYPE.setStoreTermVectors(true);
TERM_VECTORS_TYPE.freeze();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
int id = i * 10;
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Store.YES));
doc.add(new StringField("docs", "#all#", Store.NO));
PositionsTokenStream positions = new PositionsTokenStream();
positions.setId(id);
doc.add(new Field("positions", positions, POSITIONS_TYPE));
doc.add(new NumericDocValuesField("numeric", id));
String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
TextField norms = new TextField("norms", value, Store.NO);
doc.add(norms);
doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
byte[] bytes = new byte[4];
NumericUtils.intToSortableBytes(id, bytes, 0);
doc.add(new BinaryPoint("points", bytes));
docs.add(doc);
}
// Must use the same seed for both RandomIndexWriters so they behave identically
long seed = random().nextLong();
// We add document alread in ID order for the first writer:
Directory dir1 = newFSDirectory(createTempDir());
Random random1 = new Random(seed);
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
// for testing norms field
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity()));
// preserve docIDs
iwc1.setMergePolicy(newLogMergePolicy());
if (VERBOSE) {
System.out.println("TEST: now index pre-sorted");
}
RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
w1.addDocument(doc);
}
// We shuffle documents, but set index sort, for the second writer:
Directory dir2 = newFSDirectory(createTempDir());
Random random2 = new Random(seed);
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
// for testing norms field
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity()));
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
iwc2.setIndexSort(sort);
Collections.shuffle(docs, random());
if (VERBOSE) {
System.out.println("TEST: now index with index-time sorting");
}
RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
int count = 0;
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs - 1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
if (count++ == commitAtCount) {
// Ensure forceMerge really does merge
w2.commit();
}
w2.addDocument(doc);
}
if (VERBOSE) {
System.out.println("TEST: now force merge");
}
w2.forceMerge(1);
DirectoryReader r1 = w1.getReader();
DirectoryReader r2 = w2.getReader();
if (VERBOSE) {
System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
}
assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
Aggregations