use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class Test4GBStoredFields method test.
@Nightly
public void test() throws Exception {
assumeWorkingMMapOnWindows();
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields")));
dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMBufferSizeMB(256.0);
iwc.setMergeScheduler(new ConcurrentMergeScheduler());
iwc.setMergePolicy(newLogMergePolicy(false, 10));
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
// maybe we should factor out crazy cases to ExtremeCompressing? then annotations can handle this stuff...
if (random().nextBoolean()) {
iwc.setCodec(CompressingCodec.reasonableInstance(random()));
}
IndexWriter w = new IndexWriter(dir, iwc);
MergePolicy mp = w.getConfig().getMergePolicy();
if (mp instanceof LogByteSizeMergePolicy) {
// 1 petabyte:
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
}
final Document doc = new Document();
final FieldType ft = new FieldType();
ft.setStored(true);
ft.freeze();
final int valueLength = RandomNumbers.randomIntBetween(random(), 1 << 13, 1 << 20);
final byte[] value = new byte[valueLength];
for (int i = 0; i < valueLength; ++i) {
// random so that even compressing codecs can't compress it
value[i] = (byte) random().nextInt(256);
}
final Field f = new Field("fld", value, ft);
doc.add(f);
final int numDocs = (int) ((1L << 32) / valueLength + 100);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(doc);
if (VERBOSE && i % (numDocs / 10) == 0) {
System.out.println(i + " of " + numDocs + "...");
}
}
w.forceMerge(1);
w.close();
if (VERBOSE) {
boolean found = false;
for (String file : dir.listAll()) {
if (file.endsWith(".fdt")) {
final long fileLength = dir.fileLength(file);
if (fileLength >= 1L << 32) {
found = true;
}
System.out.println("File length of " + file + " : " + fileLength);
}
}
if (!found) {
System.out.println("No .fdt file larger than 4GB, test bug?");
}
}
DirectoryReader rd = DirectoryReader.open(dir);
Document sd = rd.document(numDocs - 1);
assertNotNull(sd);
assertEquals(1, sd.getFields().size());
BytesRef valueRef = sd.getBinaryValue("fld");
assertNotNull(valueRef);
assertEquals(new BytesRef(value), valueRef);
rd.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestAddIndexes method testAddIndexMissingCodec.
/*
* simple test that ensures we getting expected exceptions
*/
public void testAddIndexMissingCodec() throws IOException {
BaseDirectoryWrapper toAdd = newDirectory();
// Disable checkIndex, else we get an exception because
// of the unregistered codec:
toAdd.setCheckIndexOnClose(false);
{
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(new UnRegisteredCodec());
IndexWriter w = new IndexWriter(toAdd, conf);
Document doc = new Document();
FieldType customType = new FieldType();
customType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
doc.add(newField("foo", "bar", customType));
w.addDocument(doc);
w.close();
}
{
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat()));
IndexWriter w = new IndexWriter(dir, conf);
expectThrows(IllegalArgumentException.class, () -> {
w.addIndexes(toAdd);
});
w.close();
IndexReader open = DirectoryReader.open(dir);
assertEquals(0, open.numDocs());
open.close();
dir.close();
}
expectThrows(IllegalArgumentException.class, () -> {
DirectoryReader.open(toAdd);
});
toAdd.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class Test2BPositions method test.
public void test() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPositions"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
MergePolicy mp = w.getConfig().getMergePolicy();
if (mp instanceof LogByteSizeMergePolicy) {
// 1 petabyte:
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
}
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
Field field = new Field("field", new MyTokenStream(), ft);
doc.add(field);
final int numDocs = (Integer.MAX_VALUE / 26) + 1;
for (int i = 0; i < numDocs; i++) {
w.addDocument(doc);
if (VERBOSE && i % 100000 == 0) {
System.out.println(i + " of " + numDocs + "...");
}
}
w.forceMerge(1);
w.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestAddIndexes method testHangOnClose.
// LUCENE-1270
public void testHangOnClose() throws IOException {
Directory dir = newDirectory();
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(100);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(5).setMergePolicy(lmp));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
for (int i = 0; i < 60; i++) writer.addDocument(doc);
Document doc2 = new Document();
FieldType customType2 = new FieldType();
customType2.setStored(true);
doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
for (int i = 0; i < 10; i++) writer.addDocument(doc2);
writer.close();
Directory dir2 = newDirectory();
lmp = new LogByteSizeMergePolicy();
lmp.setMinMergeMB(0.0001);
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(4);
writer = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random())).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp));
writer.addIndexes(dir);
writer.close();
dir.close();
dir2.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestIndexWriterOnVMError method doTest.
// just one thread, serial merge policy, hopefully debuggable
private void doTest(MockDirectoryWrapper.Failure failOn) throws Exception {
// log all exceptions we hit, in case we fail (for debugging)
ByteArrayOutputStream exceptionLog = new ByteArrayOutputStream();
PrintStream exceptionStream = new PrintStream(exceptionLog, true, "UTF-8");
//PrintStream exceptionStream = System.out;
final long analyzerSeed = random().nextLong();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// we are gonna make it angry
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
// emit some payloads
if (fieldName.contains("payloads")) {
stream = new MockVariableLengthPayloadFilter(new Random(analyzerSeed), stream);
}
return new TokenStreamComponents(tokenizer, stream);
}
};
MockDirectoryWrapper dir = null;
final int numIterations = TEST_NIGHTLY ? atLeast(100) : atLeast(5);
STARTOVER: for (int iter = 0; iter < numIterations; iter++) {
try {
// close from last run
if (dir != null) {
dir.close();
}
// disable slow things: we don't rely upon sleeps here.
dir = newMockDirectory();
dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
dir.setUseSlowOpenClosers(false);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
// just for now, try to keep this test reproducible
conf.setMergeScheduler(new SerialMergeScheduler());
// test never makes it this far...
int numDocs = atLeast(2000);
IndexWriter iw = new IndexWriter(dir, conf);
// ensure there is always a commit
iw.commit();
dir.failOn(failOn);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
doc.add(new NumericDocValuesField("dv", i));
doc.add(new BinaryDocValuesField("dv2", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("dv3", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i - 1))));
doc.add(new SortedNumericDocValuesField("dv5", i));
doc.add(new SortedNumericDocValuesField("dv5", i - 1));
doc.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
// ensure we store something
doc.add(new StoredField("stored1", "foo"));
doc.add(new StoredField("stored1", "bar"));
// ensure we get some payloads
doc.add(newTextField("text_payloads", TestUtil.randomAnalysisString(random(), 6, true), Field.Store.NO));
// ensure we get some vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
doc.add(new IntPoint("point", random().nextInt()));
doc.add(new IntPoint("point2d", random().nextInt(), random().nextInt()));
if (random().nextInt(10) > 0) {
// single doc
try {
iw.addDocument(doc);
// we made it, sometimes delete our doc, or update a dv
int thingToDo = random().nextInt(4);
if (thingToDo == 0) {
iw.deleteDocuments(new Term("id", Integer.toString(i)));
} else if (thingToDo == 1) {
iw.updateNumericDocValue(new Term("id", Integer.toString(i)), "dv", i + 1L);
} else if (thingToDo == 2) {
iw.updateBinaryDocValue(new Term("id", Integer.toString(i)), "dv2", new BytesRef(Integer.toString(i + 1)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} else {
// block docs
Document doc2 = new Document();
doc2.add(newStringField("id", Integer.toString(-i), Field.Store.NO));
doc2.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
doc2.add(new StoredField("stored1", "foo"));
doc2.add(new StoredField("stored1", "bar"));
doc2.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
try {
iw.addDocuments(Arrays.asList(doc, doc2));
// we made it, sometimes delete our docs
if (random().nextBoolean()) {
iw.deleteDocuments(new Term("id", Integer.toString(i)), new Term("id", Integer.toString(-i)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
if (random().nextInt(10) == 0) {
// trigger flush:
try {
if (random().nextBoolean()) {
DirectoryReader ir = null;
try {
ir = DirectoryReader.open(iw, random().nextBoolean(), false);
TestUtil.checkReader(ir);
} finally {
IOUtils.closeWhileHandlingException(ir);
}
} else {
iw.commit();
}
if (DirectoryReader.indexExists(dir)) {
TestUtil.checkIndex(dir);
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
}
try {
iw.close();
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} catch (Throwable t) {
System.out.println("Unexpected exception: dumping fake-exception-log:...");
exceptionStream.flush();
System.out.println(exceptionLog.toString("UTF-8"));
System.out.flush();
Rethrow.rethrow(t);
}
}
dir.close();
if (VERBOSE) {
System.out.println("TEST PASSED: dumping fake-exception-log:...");
System.out.println(exceptionLog.toString("UTF-8"));
}
}
Aggregations