use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestMultiPhraseEnum method testSomeDocuments.
/** Tests union on a few documents */
public void testSomeDocuments() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new TextField("field", "foo", Field.Store.NO));
writer.addDocument(doc);
writer.addDocument(new Document());
doc = new Document();
doc.add(new TextField("field", "foo bar", Field.Store.NO));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("field", "bar", Field.Store.NO));
writer.addDocument(doc);
writer.forceMerge(1);
DirectoryReader ir = DirectoryReader.open(writer);
writer.close();
PostingsEnum p1 = getOnlyLeafReader(ir).postings(new Term("field", "foo"), PostingsEnum.POSITIONS);
PostingsEnum p2 = getOnlyLeafReader(ir).postings(new Term("field", "bar"), PostingsEnum.POSITIONS);
PostingsEnum union = new MultiPhraseQuery.UnionPostingsEnum(Arrays.asList(p1, p2));
assertEquals(-1, union.docID());
assertEquals(0, union.nextDoc());
assertEquals(1, union.freq());
assertEquals(0, union.nextPosition());
assertEquals(2, union.nextDoc());
assertEquals(2, union.freq());
assertEquals(0, union.nextPosition());
assertEquals(1, union.nextPosition());
assertEquals(3, union.nextDoc());
assertEquals(1, union.freq());
assertEquals(0, union.nextPosition());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, union.nextDoc());
ir.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestMultiPhraseQuery method testZeroPosIncr.
public void testZeroPosIncr() throws IOException {
Directory dir = new RAMDirectory();
final Token[] tokens = new Token[3];
tokens[0] = new Token();
tokens[0].append("a");
tokens[0].setPositionIncrement(1);
tokens[1] = new Token();
tokens[1].append("b");
tokens[1].setPositionIncrement(0);
tokens[2] = new Token();
tokens[2].append("c");
tokens[2].setPositionIncrement(0);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new TextField("field", new CannedTokenStream(tokens)));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("field", new CannedTokenStream(tokens)));
writer.addDocument(doc);
IndexReader r = writer.getReader();
writer.close();
IndexSearcher s = newSearcher(r);
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
// case):
if (true) {
mpqb.add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
mpqb.add(new Term[] { new Term("field", "a") }, 0);
} else {
mpqb.add(new Term[] { new Term("field", "a") }, 0);
mpqb.add(new Term[] { new Term("field", "b"), new Term("field", "c") }, 0);
}
TopDocs hits = s.search(mpqb.build(), 2);
assertEquals(2, hits.totalHits);
assertEquals(hits.scoreDocs[0].score, hits.scoreDocs[1].score, 1e-5);
/*
for(int hit=0;hit<hits.totalHits;hit++) {
ScoreDoc sd = hits.scoreDocs[hit];
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
}
*/
r.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestMultiPhraseQuery method doTestZeroPosIncrSloppy.
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
// random dir
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(null);
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
writer.addDocument(doc);
IndexReader r = DirectoryReader.open(writer);
writer.close();
IndexSearcher s = newSearcher(r);
if (VERBOSE) {
System.out.println("QUERY=" + q);
}
TopDocs hits = s.search(q, 1);
assertEquals("wrong number of results", nExpected, hits.totalHits);
if (VERBOSE) {
for (int hit = 0; hit < hits.totalHits; hit++) {
ScoreDoc sd = hits.scoreDocs[hit];
System.out.println(" hit doc=" + sd.doc + " score=" + sd.score);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestCachingTokenFilter method testCaching.
public void testCaching() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
AtomicInteger resetCount = new AtomicInteger(0);
TokenStream stream = new TokenStream() {
private int index = 0;
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public void reset() throws IOException {
super.reset();
resetCount.incrementAndGet();
}
@Override
public boolean incrementToken() {
if (index == tokens.length) {
return false;
} else {
clearAttributes();
termAtt.append(tokens[index++]);
offsetAtt.setOffset(0, 0);
return true;
}
}
};
stream = new CachingTokenFilter(stream);
doc.add(new TextField("preanalyzed", stream));
// 1) we consume all tokens twice before we add the doc to the index
assertFalse(((CachingTokenFilter) stream).isCached());
stream.reset();
assertFalse(((CachingTokenFilter) stream).isCached());
checkTokens(stream);
stream.reset();
checkTokens(stream);
assertTrue(((CachingTokenFilter) stream).isCached());
// 2) now add the document to the index and verify if all tokens are indexed
// don't reset the stream here, the DocumentWriter should do that implicitly
writer.addDocument(doc);
IndexReader reader = writer.getReader();
PostingsEnum termPositions = MultiFields.getTermPositionsEnum(reader, "preanalyzed", new BytesRef("term1"));
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
termPositions = MultiFields.getTermPositionsEnum(reader, "preanalyzed", new BytesRef("term2"));
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition());
termPositions = MultiFields.getTermPositionsEnum(reader, "preanalyzed", new BytesRef("term3"));
assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());
reader.close();
writer.close();
// 3) reset stream and consume tokens again
stream.reset();
checkTokens(stream);
assertEquals(1, resetCount.get());
dir.close();
}
use of org.apache.lucene.document.TextField in project lucene-solr by apache.
the class TestConsistentFieldNumbers method testAddIndexes.
@Test
public void testAddIndexes() throws Exception {
Directory dir1 = newDirectory();
Directory dir2 = newDirectory();
IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
Document d1 = new Document();
d1.add(new TextField("f1", "first field", Field.Store.YES));
d1.add(new TextField("f2", "second field", Field.Store.YES));
writer.addDocument(d1);
writer.close();
writer = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
Document d2 = new Document();
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setStoreTermVectors(true);
d2.add(new TextField("f2", "second field", Field.Store.YES));
d2.add(new Field("f1", "first field", customType2));
d2.add(new TextField("f3", "third field", Field.Store.YES));
d2.add(new TextField("f4", "fourth field", Field.Store.YES));
writer.addDocument(d2);
writer.close();
writer = new IndexWriter(dir1, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
writer.addIndexes(dir2);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir1);
assertEquals(2, sis.size());
FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));
assertEquals("f1", fis1.fieldInfo(0).name);
assertEquals("f2", fis1.fieldInfo(1).name);
// make sure the ordering of the "external" segment is preserved
assertEquals("f2", fis2.fieldInfo(0).name);
assertEquals("f1", fis2.fieldInfo(1).name);
assertEquals("f3", fis2.fieldInfo(2).name);
assertEquals("f4", fis2.fieldInfo(3).name);
dir1.close();
dir2.close();
}
Aggregations