use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestTermVectorsWriter method testEndOffsetPositionCharAnalyzer.
// LUCENE-1448
public void testEndOffsetPositionCharAnalyzer() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
Field f = newField("field", "abcd ", customType);
doc.add(f);
doc.add(f);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
assertNotNull(termsEnum.next());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertEquals(2, termsEnum.totalTermFreq());
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(0, dpEnum.startOffset());
assertEquals(4, dpEnum.endOffset());
dpEnum.nextPosition();
assertEquals(8, dpEnum.startOffset());
assertEquals(12, dpEnum.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
r.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestTermVectorsWriter method testDoubleOffsetCounting.
// LUCENE-1442
public void testDoubleOffsetCounting() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
Field f = newField("field", "abcd", customType);
doc.add(f);
doc.add(f);
Field f2 = newField("field", "", customType);
doc.add(f2);
doc.add(f);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
Terms vector = r.getTermVectors(0).terms("field");
assertNotNull(vector);
TermsEnum termsEnum = vector.iterator();
assertNotNull(termsEnum.next());
assertEquals("", termsEnum.term().utf8ToString());
// Token "" occurred once
assertEquals(1, termsEnum.totalTermFreq());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(8, dpEnum.startOffset());
assertEquals(8, dpEnum.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
// Token "abcd" occurred three times
assertEquals(new BytesRef("abcd"), termsEnum.next());
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
assertEquals(3, termsEnum.totalTermFreq());
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(0, dpEnum.startOffset());
assertEquals(4, dpEnum.endOffset());
dpEnum.nextPosition();
assertEquals(4, dpEnum.startOffset());
assertEquals(8, dpEnum.endOffset());
dpEnum.nextPosition();
assertEquals(8, dpEnum.startOffset());
assertEquals(12, dpEnum.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
assertNull(termsEnum.next());
r.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestTermVectorsWriter method testTermVectorCorruption.
// LUCENE-1168
public void testTermVectorCorruption() throws IOException {
Directory dir = newDirectory();
for (int iter = 0; iter < 2; iter++) {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
Document document = new Document();
FieldType customType = new FieldType();
customType.setStored(true);
Field storedField = newField("stored", "stored", customType);
document.add(storedField);
writer.addDocument(document);
writer.addDocument(document);
document = new Document();
document.add(storedField);
FieldType customType2 = new FieldType(StringField.TYPE_NOT_STORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
customType2.setStoreTermVectorOffsets(true);
Field termVectorField = newField("termVector", "termVector", customType2);
document.add(termVectorField);
writer.addDocument(document);
writer.forceMerge(1);
writer.close();
IndexReader reader = DirectoryReader.open(dir);
for (int i = 0; i < reader.numDocs(); i++) {
reader.document(i);
reader.getTermVectors(i);
}
reader.close();
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(new LogDocMergePolicy()));
Directory[] indexDirs = { new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir)) };
writer.addIndexes(indexDirs);
writer.forceMerge(1);
writer.close();
}
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestTermVectorsWriter method testEndOffsetPositionStandard.
// LUCENE-1448
public void testEndOffsetPositionStandard() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
Field f = newField("field", "abcd the ", customType);
Field f2 = newField("field", "crunch man", customType);
doc.add(f);
doc.add(f2);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
assertNotNull(termsEnum.next());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(0, dpEnum.startOffset());
assertEquals(4, dpEnum.endOffset());
assertNotNull(termsEnum.next());
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(11, dpEnum.startOffset());
assertEquals(17, dpEnum.endOffset());
assertNotNull(termsEnum.next());
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
assertEquals(18, dpEnum.startOffset());
assertEquals(21, dpEnum.endOffset());
r.close();
dir.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestSloppyPhraseQuery method makeDocument.
private static Document makeDocument(String docText) {
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setOmitNorms(true);
Field f = new Field("f", docText, customType);
doc.add(f);
return doc;
}
Aggregations