Search in sources :

Example 16 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestCompressingTermVectorsFormat method testChunkCleanup.

/**
   * writes some tiny segments with incomplete compressed blocks,
   * and ensures merge recompresses them.
   */
public void testChunkCleanup() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
    // we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
    // by this test.
    iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
    IndexWriter iw = new IndexWriter(dir, iwConf);
    DirectoryReader ir = DirectoryReader.open(iw);
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        ft.setStoreTermVectors(true);
        doc.add(new Field("text", "not very long at all", ft));
        iw.addDocument(doc);
        // force flush
        DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
        assertNotNull(ir2);
        ir.close();
        ir = ir2;
        // examine dirty counts:
        for (LeafReaderContext leaf : ir2.leaves()) {
            CodecReader sr = (CodecReader) leaf.reader();
            CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
            assertEquals(1, reader.getNumChunks());
            assertEquals(1, reader.getNumDirtyChunks());
        }
    }
    iw.getConfig().setMergePolicy(newLogMergePolicy());
    iw.forceMerge(1);
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
    assertNotNull(ir2);
    ir.close();
    ir = ir2;
    CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
    CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
    // we could get lucky, and have zero, but typically one.
    assertTrue(reader.getNumDirtyChunks() <= 1);
    ir.close();
    iw.close();
    dir.close();
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CodecReader(org.apache.lucene.index.CodecReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 17 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestCompressingTermVectorsFormat method testNoOrds.

// https://issues.apache.org/jira/browse/LUCENE-5156
public void testNoOrds() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setStoreTermVectors(true);
    doc.add(new Field("foo", "this is a test", ft));
    iw.addDocument(doc);
    LeafReader ir = getOnlyLeafReader(iw.getReader());
    Terms terms = ir.getTermVector(0, "foo");
    assertNotNull(terms);
    TermsEnum termsEnum = terms.iterator();
    assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));
    try {
        termsEnum.ord();
        fail();
    } catch (UnsupportedOperationException expected) {
    // expected exception
    }
    try {
        termsEnum.seekExact(0);
        fail();
    } catch (UnsupportedOperationException expected) {
    // expected exception
    }
    ir.close();
    iw.close();
    dir.close();
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) FieldType(org.apache.lucene.document.FieldType) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 18 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestCompressingStoredFieldsFormat method testDeletePartiallyWrittenFilesIfAbort.

public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    iwConf.setCodec(CompressingCodec.randomInstance(random()));
    // disable CFS because this test checks file names
    iwConf.setMergePolicy(newLogMergePolicy(false));
    iwConf.setUseCompoundFile(false);
    // Cannot use RIW because this test wants CFS to stay off:
    IndexWriter iw = new IndexWriter(dir, iwConf);
    final Document validDoc = new Document();
    validDoc.add(new IntPoint("id", 0));
    validDoc.add(new StoredField("id", 0));
    iw.addDocument(validDoc);
    iw.commit();
    // make sure that #writeField will fail to trigger an abort
    final Document invalidDoc = new Document();
    FieldType fieldType = new FieldType();
    fieldType.setStored(true);
    invalidDoc.add(new Field("invalid", fieldType) {

        @Override
        public String stringValue() {
            // abort the segment!!  We should fix this.
            return null;
        }
    });
    try {
        iw.addDocument(invalidDoc);
        iw.commit();
    } catch (IllegalArgumentException iae) {
        // expected
        assertEquals(iae, iw.getTragicException());
    }
    // Writer should be closed by tragedy
    assertFalse(iw.isOpen());
    dir.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 19 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestSimilarity2 method testOmitTF.

/** make sure all sims work if TF is omitted */
public void testOmitTF() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS);
    ft.freeze();
    Field f = newField("foo", "bar", ft);
    doc.add(f);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
        assertEquals(1, is.search(query.build(), 10).totalHits);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 20 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestSimilarity2 method testNoNorms.

/** make sure we can retrieve when norms are disabled */
public void testNoNorms() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setOmitNorms(true);
    ft.freeze();
    doc.add(newField("foo", "bar", ft));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
        assertEquals(1, is.search(query.build(), 10).totalHits);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) FieldType(org.apache.lucene.document.FieldType)

Aggregations

FieldType (org.apache.lucene.document.FieldType)283 Document (org.apache.lucene.document.Document)244 Field (org.apache.lucene.document.Field)209 Directory (org.apache.lucene.store.Directory)175 TextField (org.apache.lucene.document.TextField)168 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)125 StringField (org.apache.lucene.document.StringField)88 StoredField (org.apache.lucene.document.StoredField)77 BytesRef (org.apache.lucene.util.BytesRef)56 IndexWriter (org.apache.lucene.index.IndexWriter)53 IndexReader (org.apache.lucene.index.IndexReader)49 IndexSearcher (org.apache.lucene.search.IndexSearcher)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 Term (org.apache.lucene.index.Term)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)39 RAMDirectory (org.apache.lucene.store.RAMDirectory)37 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34 TermQuery (org.apache.lucene.search.TermQuery)34 Analyzer (org.apache.lucene.analysis.Analyzer)33 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33