use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.
the class TestRTGBase method getFirstMatch.
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Terms terms = MultiFields.getTerms(r, t.field());
if (terms == null)
return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator();
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, MultiFields.getLiveDocs(r));
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.
the class TestPerfTasksLogic method testReadTokens.
/**
* Test ReadTokensTask
*/
public void testReadTokens() throws Exception {
// We will call ReadTokens on this many docs
final int NUM_DOCS = 20;
// Read tokens from first NUM_DOCS docs from Reuters and
// then build index from the same docs
String[] algLines1 = { "# ----- properties ", "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + getReuters20LinesFile(), "# ----- alg ", "{ReadTokens}: " + NUM_DOCS, "ResetSystemErase", "CreateIndex", "{AddDoc}: " + NUM_DOCS, "CloseIndex" };
// Run algo
Benchmark benchmark = execBenchmark(algLines1);
List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
// Count how many tokens all ReadTokens saw
int totalTokenCount1 = 0;
for (final TaskStats stat : stats) {
if (stat.getTask().getName().equals("ReadTokens")) {
totalTokenCount1 += stat.getCount();
}
}
// Separately count how many tokens are actually in the index:
IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
assertEquals(NUM_DOCS, reader.numDocs());
int totalTokenCount2 = 0;
Fields fields = MultiFields.getFields(reader);
for (String fieldName : fields) {
if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
continue;
}
Terms terms = fields.terms(fieldName);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator();
PostingsEnum docs = null;
while (termsEnum.next() != null) {
docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS);
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}
}
}
reader.close();
// Make sure they are the same
assertEquals(totalTokenCount1, totalTokenCount2);
}
use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.
the class TestKeywordAnalyzer method testMutipleDocument.
/*
public void testPerFieldAnalyzer() throws Exception {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
QueryParser queryParser = new QueryParser("description", analyzer);
Query query = queryParser.parse("partnum:Q36 AND SPACE");
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("Q36 kept as-is",
"+partnum:Q36 +space", query.toString("description"));
assertEquals("doc found!", 1, hits.length);
}
*/
public void testMutipleDocument() throws Exception {
RAMDirectory dir = new RAMDirectory();
Analyzer analyzer = new KeywordAnalyzer();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
Document doc = new Document();
doc.add(new TextField("partnum", "Q36", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new TextField("partnum", "Q37", Field.Store.YES));
writer.addDocument(doc);
writer.close();
IndexReader reader = DirectoryReader.open(dir);
PostingsEnum td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q36"), null, 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q37"), null, 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
analyzer.close();
}
use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.
the class TestTeeSinkTokenFilter method testEndOffsetPositionWithTeeSinkTokenFilter.
// LUCENE-1448
// TODO: instead of testing it this way, we can test
// with BaseTokenStreamTestCase now...
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
Document doc = new Document();
TokenStream tokenStream = analyzer.tokenStream("field", "abcd ");
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
TokenStream sink = tee.newSinkTokenStream();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true);
Field f1 = new Field("field", tee, ft);
Field f2 = new Field("field", sink, ft);
doc.add(f1);
doc.add(f2);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
Terms vector = r.getTermVectors(0).terms("field");
assertEquals(1, vector.size());
TermsEnum termsEnum = vector.iterator();
termsEnum.next();
assertEquals(2, termsEnum.totalTermFreq());
PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, positions.freq());
positions.nextPosition();
assertEquals(0, positions.startOffset());
assertEquals(4, positions.endOffset());
positions.nextPosition();
assertEquals(8, positions.startOffset());
assertEquals(12, positions.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
r.close();
dir.close();
analyzer.close();
}
use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.
the class TestBlockPostingsFormat3 method assertTermsEnum.
/**
* checks the terms enum sequentially
* if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
*/
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception {
BytesRef term;
PostingsEnum leftPositions = null;
PostingsEnum rightPositions = null;
PostingsEnum leftDocs = null;
PostingsEnum rightDocs = null;
while ((term = leftTermsEnum.next()) != null) {
assertEquals(term, rightTermsEnum.next());
assertTermStats(leftTermsEnum, rightTermsEnum);
if (deep) {
if (hasPositions) {
// with payloads + off
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
// with payloads only
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));
assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));
// with offsets only
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));
assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));
// with positions only
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
}
// with freqs:
assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs));
// w/o freqs:
assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
// with freqs:
assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs));
// w/o freqs:
assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
}
}
assertNull(rightTermsEnum.next());
}
Aggregations