Search in sources :

Example 76 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class BaseTermVectorsFormatTestCase method testPostingsEnumAll.

public void testPostingsEnumAll() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);
    Document doc = new Document();
    Token token1 = new Token("bar", 0, 3);
    token1.setPayload(new BytesRef("pay1"));
    Token token2 = new Token("bar", 4, 7);
    token2.setPayload(new BytesRef("pay2"));
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setStoreTermVectorOffsets(true);
    doc.add(new Field("foo", new CannedTokenStream(token1, token2), ft));
    iw.addDocument(doc);
    DirectoryReader reader = DirectoryReader.open(iw);
    Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
    TermsEnum termsEnum = terms.iterator();
    assertNotNull(termsEnum);
    assertEquals(new BytesRef("bar"), termsEnum.next());
    // sugar method (FREQS)
    PostingsEnum postings = termsEnum.postings(null);
    assertEquals(-1, postings.docID());
    assertEquals(0, postings.nextDoc());
    assertEquals(2, postings.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
    // termsenum reuse (FREQS)
    PostingsEnum postings2 = termsEnum.postings(postings);
    assertNotNull(postings2);
    // and it had better work
    assertEquals(-1, postings2.docID());
    assertEquals(0, postings2.nextDoc());
    assertEquals(2, postings2.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
    // asking for docs only: ok
    PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
    assertEquals(-1, docsOnly.docID());
    assertEquals(0, docsOnly.nextDoc());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
    // reuse that too
    PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
    assertNotNull(docsOnly2);
    // and it had better work
    assertEquals(-1, docsOnly2.docID());
    assertEquals(0, docsOnly2.nextDoc());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
    // asking for positions, ok
    PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
    assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
    assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // now reuse the positions
    PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
    assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
    assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    // payloads
    docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
    assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum.getPayload());
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
    assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // reuse
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
    assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum2.getPayload());
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
    assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum2.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(0, docsAndPositionsEnum.startOffset());
    assertEquals(3, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(4, docsAndPositionsEnum.startOffset());
    assertEquals(7, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // reuse
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(0, docsAndPositionsEnum2.startOffset());
    assertEquals(3, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(4, docsAndPositionsEnum2.startOffset());
    assertEquals(7, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(0, docsAndPositionsEnum.startOffset());
    assertEquals(3, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum.getPayload());
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(4, docsAndPositionsEnum.startOffset());
    assertEquals(7, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(0, docsAndPositionsEnum2.startOffset());
    assertEquals(3, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum2.getPayload());
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(4, docsAndPositionsEnum2.startOffset());
    assertEquals(7, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum2.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    iw.close();
    reader.close();
    dir.close();
}
Also used : Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 77 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class BasePostingsFormatTestCase method testPostingsEnumPayloads.

public void testPostingsEnumPayloads() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);
    Document doc = new Document();
    Token token1 = new Token("bar", 0, 3);
    token1.setPayload(new BytesRef("pay1"));
    Token token2 = new Token("bar", 4, 7);
    token2.setPayload(new BytesRef("pay2"));
    doc.add(new TextField("foo", new CannedTokenStream(token1, token2)));
    iw.addDocument(doc);
    DirectoryReader reader = DirectoryReader.open(iw);
    // sugar method (FREQS)
    PostingsEnum postings = getOnlyLeafReader(reader).postings(new Term("foo", "bar"));
    assertEquals(-1, postings.docID());
    assertEquals(0, postings.nextDoc());
    assertEquals(2, postings.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
    // termsenum reuse (FREQS)
    TermsEnum termsEnum = getOnlyLeafReader(reader).terms("foo").iterator();
    termsEnum.seekExact(new BytesRef("bar"));
    PostingsEnum postings2 = termsEnum.postings(postings);
    assertNotNull(postings2);
    assertReused("foo", postings, postings2);
    // and it had better work
    assertEquals(-1, postings2.docID());
    assertEquals(0, postings2.nextDoc());
    assertEquals(2, postings2.freq());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
    // asking for docs only: ok
    PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
    assertEquals(-1, docsOnly.docID());
    assertEquals(0, docsOnly.nextDoc());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
    // reuse that too
    PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
    assertNotNull(docsOnly2);
    assertReused("foo", docsOnly, docsOnly2);
    // and it had better work
    assertEquals(-1, docsOnly2.docID());
    assertEquals(0, docsOnly2.nextDoc());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
    // asking for positions, ok
    PostingsEnum docsAndPositionsEnum = getOnlyLeafReader(reader).postings(new Term("foo", "bar"), PostingsEnum.POSITIONS);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // now reuse the positions
    PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
    assertReused("foo", docsAndPositionsEnum, docsAndPositionsEnum2);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    // payloads
    docsAndPositionsEnum = getOnlyLeafReader(reader).postings(new Term("foo", "bar"), PostingsEnum.PAYLOADS);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum.getPayload());
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // reuse
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
    assertReused("foo", docsAndPositionsEnum, docsAndPositionsEnum2);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum2.getPayload());
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum2.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    docsAndPositionsEnum = getOnlyLeafReader(reader).postings(new Term("foo", "bar"), PostingsEnum.OFFSETS);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    // reuse
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
    assertReused("foo", docsAndPositionsEnum, docsAndPositionsEnum2);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    // we don't define what it is, but if its something else, we should look into it?
    assertTrue(docsAndPositionsEnum2.getPayload() == null || new BytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    docsAndPositionsEnum = getOnlyLeafReader(reader).postings(new Term("foo", "bar"), PostingsEnum.ALL);
    assertNotNull(docsAndPositionsEnum);
    assertEquals(-1, docsAndPositionsEnum.docID());
    assertEquals(0, docsAndPositionsEnum.nextDoc());
    assertEquals(2, docsAndPositionsEnum.freq());
    assertEquals(0, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum.getPayload());
    assertEquals(1, docsAndPositionsEnum.nextPosition());
    assertEquals(-1, docsAndPositionsEnum.startOffset());
    assertEquals(-1, docsAndPositionsEnum.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
    docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
    assertReused("foo", docsAndPositionsEnum, docsAndPositionsEnum2);
    assertEquals(-1, docsAndPositionsEnum2.docID());
    assertEquals(0, docsAndPositionsEnum2.nextDoc());
    assertEquals(2, docsAndPositionsEnum2.freq());
    assertEquals(0, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay1"), docsAndPositionsEnum2.getPayload());
    assertEquals(1, docsAndPositionsEnum2.nextPosition());
    assertEquals(-1, docsAndPositionsEnum2.startOffset());
    assertEquals(-1, docsAndPositionsEnum2.endOffset());
    assertEquals(new BytesRef("pay2"), docsAndPositionsEnum2.getPayload());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
    iw.close();
    reader.close();
    dir.close();
}
Also used : TextField(org.apache.lucene.document.TextField) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 78 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class TestField method testTextFieldReader.

public void testTextFieldReader() throws Exception {
    Field field = new TextField("foo", new StringReader("bar"));
    trySetByteValue(field);
    trySetBytesValue(field);
    trySetBytesRefValue(field);
    trySetDoubleValue(field);
    trySetIntValue(field);
    trySetFloatValue(field);
    trySetLongValue(field);
    field.setReaderValue(new StringReader("foobar"));
    trySetShortValue(field);
    trySetStringValue(field);
    field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
    assertNotNull(field.readerValue());
}
Also used : StringReader(java.io.StringReader) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 79 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class TestField method testTextFieldString.

public void testTextFieldString() throws Exception {
    Field[] fields = new Field[] { new TextField("foo", "bar", Field.Store.NO), new TextField("foo", "bar", Field.Store.YES) };
    for (Field field : fields) {
        trySetByteValue(field);
        trySetBytesValue(field);
        trySetBytesRefValue(field);
        trySetDoubleValue(field);
        trySetIntValue(field);
        trySetFloatValue(field);
        trySetLongValue(field);
        trySetReaderValue(field);
        trySetShortValue(field);
        field.setStringValue("baz");
        field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
        assertEquals("baz", field.stringValue());
    }
}
Also used : Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 80 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class TestMaxPosition method testMaxPosition.

public void testMaxPosition() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    // This is at position 0:
    Token t1 = new Token("foo", 0, 3);
    if (random().nextBoolean()) {
        t1.setPayload(new BytesRef(new byte[] { 0x1 }));
    }
    Token t2 = new Token("foo", 4, 7);
    t2.setPositionIncrement(IndexWriter.MAX_POSITION);
    if (random().nextBoolean()) {
        t2.setPayload(new BytesRef(new byte[] { 0x1 }));
    }
    doc.add(new TextField("foo", new CannedTokenStream(new Token[] { t1, t2 })));
    iw.addDocument(doc);
    // Document should be visible:
    IndexReader r = DirectoryReader.open(iw);
    assertEquals(1, r.numDocs());
    PostingsEnum postings = MultiFields.getTermPositionsEnum(r, "foo", new BytesRef("foo"));
    // "foo" appears in docID=0
    assertEquals(0, postings.nextDoc());
    // "foo" appears 2 times in the doc
    assertEquals(2, postings.freq());
    // first at pos=0
    assertEquals(0, postings.nextPosition());
    // next at pos=MAX
    assertEquals(IndexWriter.MAX_POSITION, postings.nextPosition());
    r.close();
    iw.close();
    dir.close();
}
Also used : TextField(org.apache.lucene.document.TextField) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

Token (org.apache.lucene.analysis.Token)100 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)39 TokenStream (org.apache.lucene.analysis.TokenStream)31 Directory (org.apache.lucene.store.Directory)24 Test (org.junit.Test)23 Document (org.apache.lucene.document.Document)19 TextField (org.apache.lucene.document.TextField)19 BytesRef (org.apache.lucene.util.BytesRef)16 NamedList (org.apache.solr.common.util.NamedList)16 StringReader (java.io.StringReader)15 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)15 Analyzer (org.apache.lucene.analysis.Analyzer)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)13 Field (org.apache.lucene.document.Field)13 FieldType (org.apache.lucene.document.FieldType)11 IndexReader (org.apache.lucene.index.IndexReader)11 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)10 Tokenizer (org.apache.lucene.analysis.Tokenizer)9 Date (java.util.Date)8