Search in sources :

Example 36 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestRTGBase method getFirstMatch.

protected int getFirstMatch(IndexReader r, Term t) throws IOException {
    Terms terms = MultiFields.getTerms(r, t.field());
    if (terms == null)
        return -1;
    BytesRef termBytes = t.bytes();
    final TermsEnum termsEnum = terms.iterator();
    if (!termsEnum.seekExact(termBytes)) {
        return -1;
    }
    PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
    docs = BitsFilteredPostingsEnum.wrap(docs, MultiFields.getLiveDocs(r));
    int id = docs.nextDoc();
    if (id != DocIdSetIterator.NO_MORE_DOCS) {
        int next = docs.nextDoc();
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
    }
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
Also used : Terms(org.apache.lucene.index.Terms) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 37 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestPerfTasksLogic method testReadTokens.

/**
   * Test ReadTokensTask
   */
public void testReadTokens() throws Exception {
    // We will call ReadTokens on this many docs
    final int NUM_DOCS = 20;
    // Read tokens from first NUM_DOCS docs from Reuters and
    // then build index from the same docs
    String[] algLines1 = { "# ----- properties ", "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + getReuters20LinesFile(), "# ----- alg ", "{ReadTokens}: " + NUM_DOCS, "ResetSystemErase", "CreateIndex", "{AddDoc}: " + NUM_DOCS, "CloseIndex" };
    // Run algo
    Benchmark benchmark = execBenchmark(algLines1);
    List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();
    // Count how many tokens all ReadTokens saw
    int totalTokenCount1 = 0;
    for (final TaskStats stat : stats) {
        if (stat.getTask().getName().equals("ReadTokens")) {
            totalTokenCount1 += stat.getCount();
        }
    }
    // Separately count how many tokens are actually in the index:
    IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
    assertEquals(NUM_DOCS, reader.numDocs());
    int totalTokenCount2 = 0;
    Fields fields = MultiFields.getFields(reader);
    for (String fieldName : fields) {
        if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
            continue;
        }
        Terms terms = fields.terms(fieldName);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        PostingsEnum docs = null;
        while (termsEnum.next() != null) {
            docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS);
            while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                totalTokenCount2 += docs.freq();
            }
        }
    }
    reader.close();
    // Make sure they are the same
    assertEquals(totalTokenCount1, totalTokenCount2);
}
Also used : Fields(org.apache.lucene.index.Fields) MultiFields(org.apache.lucene.index.MultiFields) IndexReader(org.apache.lucene.index.IndexReader) Terms(org.apache.lucene.index.Terms) TaskStats(org.apache.lucene.benchmark.byTask.stats.TaskStats) PostingsEnum(org.apache.lucene.index.PostingsEnum) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 38 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestKeywordAnalyzer method testMutipleDocument.

/*
  public void testPerFieldAnalyzer() throws Exception {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
    analyzer.addAnalyzer("partnum", new KeywordAnalyzer());

    QueryParser queryParser = new QueryParser("description", analyzer);
    Query query = queryParser.parse("partnum:Q36 AND SPACE");

    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("Q36 kept as-is",
              "+partnum:Q36 +space", query.toString("description"));
    assertEquals("doc found!", 1, hits.length);
  }
  */
public void testMutipleDocument() throws Exception {
    RAMDirectory dir = new RAMDirectory();
    Analyzer analyzer = new KeywordAnalyzer();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
    Document doc = new Document();
    doc.add(new TextField("partnum", "Q36", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("partnum", "Q37", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = DirectoryReader.open(dir);
    PostingsEnum td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q36"), null, 0);
    assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q37"), null, 0);
    assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    analyzer.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) PostingsEnum(org.apache.lucene.index.PostingsEnum) RAMDirectory(org.apache.lucene.store.RAMDirectory) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 39 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestTeeSinkTokenFilter method testEndOffsetPositionWithTeeSinkTokenFilter.

// LUCENE-1448
// TODO: instead of testing it this way, we can test 
// with BaseTokenStreamTestCase now...
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    Document doc = new Document();
    TokenStream tokenStream = analyzer.tokenStream("field", "abcd   ");
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
    TokenStream sink = tee.newSinkTokenStream();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    Field f1 = new Field("field", tee, ft);
    Field f2 = new Field("field", sink, ft);
    doc.add(f1);
    doc.add(f2);
    w.addDocument(doc);
    w.close();
    IndexReader r = DirectoryReader.open(dir);
    Terms vector = r.getTermVectors(0).terms("field");
    assertEquals(1, vector.size());
    TermsEnum termsEnum = vector.iterator();
    termsEnum.next();
    assertEquals(2, termsEnum.totalTermFreq());
    PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
    assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(2, positions.freq());
    positions.nextPosition();
    assertEquals(0, positions.startOffset());
    assertEquals(4, positions.endOffset());
    positions.nextPosition();
    assertEquals(8, positions.startOffset());
    assertEquals(12, positions.endOffset());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
    r.close();
    dir.close();
    analyzer.close();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Terms(org.apache.lucene.index.Terms) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TermsEnum(org.apache.lucene.index.TermsEnum) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) PostingsEnum(org.apache.lucene.index.PostingsEnum) Directory(org.apache.lucene.store.Directory)

Example 40 with PostingsEnum

use of org.apache.lucene.index.PostingsEnum in project lucene-solr by apache.

the class TestBlockPostingsFormat3 method assertTermsEnum.

/** 
   * checks the terms enum sequentially
   * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
   */
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception {
    BytesRef term;
    PostingsEnum leftPositions = null;
    PostingsEnum rightPositions = null;
    PostingsEnum leftDocs = null;
    PostingsEnum rightDocs = null;
    while ((term = leftTermsEnum.next()) != null) {
        assertEquals(term, rightTermsEnum.next());
        assertTermStats(leftTermsEnum, rightTermsEnum);
        if (deep) {
            if (hasPositions) {
                // with payloads + off
                assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
                assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL));
                // with payloads only
                assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));
                assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS));
                // with offsets only
                assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));
                assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS));
                // with positions only
                assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
                assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS));
            }
            // with freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs));
            // w/o freqs:
            assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
            // with freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs));
            // w/o freqs:
            assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE));
        }
    }
    assertNull(rightTermsEnum.next());
}
Also used : PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

PostingsEnum (org.apache.lucene.index.PostingsEnum)74 BytesRef (org.apache.lucene.util.BytesRef)55 TermsEnum (org.apache.lucene.index.TermsEnum)51 Terms (org.apache.lucene.index.Terms)43 Fields (org.apache.lucene.index.Fields)18 LeafReader (org.apache.lucene.index.LeafReader)17 Term (org.apache.lucene.index.Term)16 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)15 Document (org.apache.lucene.document.Document)13 ArrayList (java.util.ArrayList)11 IndexReader (org.apache.lucene.index.IndexReader)10 TextField (org.apache.lucene.document.TextField)9 Directory (org.apache.lucene.store.Directory)9 Bits (org.apache.lucene.util.Bits)9 IOException (java.io.IOException)8 DirectoryReader (org.apache.lucene.index.DirectoryReader)7 IndexWriter (org.apache.lucene.index.IndexWriter)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)5