use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class SimpleFragmentsBuilderTest method makeUnstoredIndex.
protected void makeUnstoredIndex() throws Exception {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
doc.add(new Field(F, "aaa", customType));
//doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
writer.addDocument(doc);
writer.close();
if (reader != null)
reader.close();
reader = DirectoryReader.open(dir);
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestPostingsOffsets method checkTokens.
// TODO: more tests with other possibilities
private void checkTokens(Token[] field1, Token[] field2) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(field1), ft));
doc.add(new Field("body", new CannedTokenStream(field2), ft));
riw.addDocument(doc);
riw.close();
success = true;
} finally {
if (success) {
IOUtils.close(dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestIndexSorting method testRandom2.
public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
POSITIONS_TYPE.freeze();
FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
TERM_VECTORS_TYPE.setStoreTermVectors(true);
TERM_VECTORS_TYPE.freeze();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
int id = i * 10;
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Store.YES));
doc.add(new StringField("docs", "#all#", Store.NO));
PositionsTokenStream positions = new PositionsTokenStream();
positions.setId(id);
doc.add(new Field("positions", positions, POSITIONS_TYPE));
doc.add(new NumericDocValuesField("numeric", id));
String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
TextField norms = new TextField("norms", value, Store.NO);
doc.add(norms);
doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
byte[] bytes = new byte[4];
NumericUtils.intToSortableBytes(id, bytes, 0);
doc.add(new BinaryPoint("points", bytes));
docs.add(doc);
}
// Must use the same seed for both RandomIndexWriters so they behave identically
long seed = random().nextLong();
// We add document alread in ID order for the first writer:
Directory dir1 = newFSDirectory(createTempDir());
Random random1 = new Random(seed);
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
// for testing norms field
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity()));
// preserve docIDs
iwc1.setMergePolicy(newLogMergePolicy());
if (VERBOSE) {
System.out.println("TEST: now index pre-sorted");
}
RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
w1.addDocument(doc);
}
// We shuffle documents, but set index sort, for the second writer:
Directory dir2 = newFSDirectory(createTempDir());
Random random2 = new Random(seed);
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
// for testing norms field
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity()));
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
iwc2.setIndexSort(sort);
Collections.shuffle(docs, random());
if (VERBOSE) {
System.out.println("TEST: now index with index-time sorting");
}
RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
int count = 0;
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs - 1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
if (count++ == commitAtCount) {
// Ensure forceMerge really does merge
w2.commit();
}
w2.addDocument(doc);
}
if (VERBOSE) {
System.out.println("TEST: now force merge");
}
w2.forceMerge(1);
DirectoryReader r1 = w1.getReader();
DirectoryReader r2 = w2.getReader();
if (VERBOSE) {
System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
}
assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestIndexWriter method testNoUnwantedTVFiles.
public void testNoUnwantedTVFiles() throws Exception {
Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy()));
indexWriter.getConfig().getMergePolicy().setNoCFSRatio(0.0);
String BIG = "alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
BIG = BIG + BIG + BIG + BIG;
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setOmitNorms(true);
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setTokenized(false);
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setTokenized(false);
customType3.setOmitNorms(true);
for (int i = 0; i < 2; i++) {
Document doc = new Document();
doc.add(new Field("id", Integer.toString(i) + BIG, customType3));
doc.add(new Field("str", Integer.toString(i) + BIG, customType2));
doc.add(new Field("str2", Integer.toString(i) + BIG, storedTextType));
doc.add(new Field("str3", Integer.toString(i) + BIG, customType));
indexWriter.addDocument(doc);
}
indexWriter.close();
TestUtil.checkIndex(dir);
assertNoUnreferencedFiles(dir, "no tv files");
DirectoryReader r0 = DirectoryReader.open(dir);
for (LeafReaderContext ctx : r0.leaves()) {
SegmentReader sr = (SegmentReader) ctx.reader();
assertFalse(sr.getFieldInfos().hasVectors());
}
r0.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestExceedMaxTermLength method test.
public void test() throws Exception {
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(random(), new MockAnalyzer(random())));
try {
final FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
ft.setStored(random().nextBoolean());
ft.freeze();
final Document doc = new Document();
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft));
}
// problematic field
final String name = TestUtil.randomSimpleString(random(), 1, 50);
final String value = TestUtil.randomSimpleString(random(), minTestTermLength, maxTestTermLegnth);
final Field f = new Field(name, value, ft);
if (random().nextBoolean()) {
// totally ok short field value
doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft));
}
doc.add(f);
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
});
String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
String msg = expected.getMessage();
assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg, msg.contains("immense term"));
assertTrue("IllegalArgumentException didn't mention max length (" + maxLengthMsg + "): " + msg, msg.contains(maxLengthMsg));
assertTrue("IllegalArgumentException didn't mention field name (" + name + "): " + msg, msg.contains(name));
assertTrue("IllegalArgumentException didn't mention original message: " + msg, msg.contains("bytes can be at most") && msg.contains("in length; got"));
} finally {
w.close();
}
}
Aggregations