Search in sources :

Example 76 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testDeleteUnusedUpdatesFiles.

@Test
public void testDeleteUnusedUpdatesFiles() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new StringField("id", "d0", Store.NO));
    doc.add(new NumericDocValuesField("f1", 1L));
    doc.add(new NumericDocValuesField("f2", 1L));
    writer.addDocument(doc);
    // update each field twice to make sure all unneeded files are deleted
    for (String f : new String[] { "f1", "f2" }) {
        writer.updateNumericDocValue(new Term("id", "d0"), f, 2L);
        writer.commit();
        int numFiles = dir.listAll().length;
        // update again, number of files shouldn't change (old field's gen is
        // removed) 
        writer.updateNumericDocValue(new Term("id", "d0"), f, 3L);
        writer.commit();
        assertEquals(numFiles, dir.listAll().length);
    }
    writer.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Example 77 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestPointQueries method testAllPointDocsWereDeletedAndThenMergedAgain.

public void testAllPointDocsWereDeletedAndThenMergedAgain() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setCodec(getCodec());
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new StringField("id", "0", Field.Store.NO));
    doc.add(new LongPoint("value", 0L));
    w.addDocument(doc);
    // Add document that won't be deleted to avoid IW dropping
    // segment below since it's 100% deleted:
    w.addDocument(new Document());
    w.commit();
    // Need another segment so we invoke BKDWriter.merge
    doc = new Document();
    doc.add(new StringField("id", "0", Field.Store.NO));
    doc.add(new LongPoint("value", 0L));
    w.addDocument(doc);
    w.addDocument(new Document());
    w.deleteDocuments(new Term("id", "0"));
    w.forceMerge(1);
    doc = new Document();
    doc.add(new StringField("id", "0", Field.Store.NO));
    doc.add(new LongPoint("value", 0L));
    w.addDocument(doc);
    w.addDocument(new Document());
    w.deleteDocuments(new Term("id", "0"));
    w.forceMerge(1);
    IOUtils.close(w, dir);
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) LongPoint(org.apache.lucene.document.LongPoint) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 78 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestDirectoryReader method testGetFieldNames.

/**
   * Tests the IndexReader.getFieldNames implementation
   * @throws Exception on error
   */
public void testGetFieldNames() throws Exception {
    Directory d = newDirectory();
    // set up writer
    IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    FieldType customType3 = new FieldType();
    customType3.setStored(true);
    doc.add(new StringField("keyword", "test1", Field.Store.YES));
    doc.add(new TextField("text", "test1", Field.Store.YES));
    doc.add(new Field("unindexed", "test1", customType3));
    doc.add(new TextField("unstored", "test1", Field.Store.NO));
    writer.addDocument(doc);
    writer.close();
    // set up reader
    DirectoryReader reader = DirectoryReader.open(d);
    FieldInfos fieldInfos = MultiFields.getMergedFieldInfos(reader);
    assertNotNull(fieldInfos.fieldInfo("keyword"));
    assertNotNull(fieldInfos.fieldInfo("text"));
    assertNotNull(fieldInfos.fieldInfo("unindexed"));
    assertNotNull(fieldInfos.fieldInfo("unstored"));
    reader.close();
    // add more documents
    writer = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
    // want to get some more segments here
    int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor();
    for (int i = 0; i < 5 * mergeFactor; i++) {
        doc = new Document();
        doc.add(new StringField("keyword", "test1", Field.Store.YES));
        doc.add(new TextField("text", "test1", Field.Store.YES));
        doc.add(new Field("unindexed", "test1", customType3));
        doc.add(new TextField("unstored", "test1", Field.Store.NO));
        writer.addDocument(doc);
    }
    // new fields are in some different segments (we hope)
    for (int i = 0; i < 5 * mergeFactor; i++) {
        doc = new Document();
        doc.add(new StringField("keyword2", "test1", Field.Store.YES));
        doc.add(new TextField("text2", "test1", Field.Store.YES));
        doc.add(new Field("unindexed2", "test1", customType3));
        doc.add(new TextField("unstored2", "test1", Field.Store.NO));
        writer.addDocument(doc);
    }
    // new termvector fields
    FieldType customType5 = new FieldType(TextField.TYPE_STORED);
    customType5.setStoreTermVectors(true);
    FieldType customType6 = new FieldType(TextField.TYPE_STORED);
    customType6.setStoreTermVectors(true);
    customType6.setStoreTermVectorOffsets(true);
    FieldType customType7 = new FieldType(TextField.TYPE_STORED);
    customType7.setStoreTermVectors(true);
    customType7.setStoreTermVectorPositions(true);
    FieldType customType8 = new FieldType(TextField.TYPE_STORED);
    customType8.setStoreTermVectors(true);
    customType8.setStoreTermVectorOffsets(true);
    customType8.setStoreTermVectorPositions(true);
    for (int i = 0; i < 5 * mergeFactor; i++) {
        doc = new Document();
        doc.add(new TextField("tvnot", "tvnot", Field.Store.YES));
        doc.add(new Field("termvector", "termvector", customType5));
        doc.add(new Field("tvoffset", "tvoffset", customType6));
        doc.add(new Field("tvposition", "tvposition", customType7));
        doc.add(new Field("tvpositionoffset", "tvpositionoffset", customType8));
        writer.addDocument(doc);
    }
    writer.close();
    // verify fields again
    reader = DirectoryReader.open(d);
    fieldInfos = MultiFields.getMergedFieldInfos(reader);
    Collection<String> allFieldNames = new HashSet<>();
    Collection<String> indexedFieldNames = new HashSet<>();
    Collection<String> notIndexedFieldNames = new HashSet<>();
    Collection<String> tvFieldNames = new HashSet<>();
    for (FieldInfo fieldInfo : fieldInfos) {
        final String name = fieldInfo.name;
        allFieldNames.add(name);
        if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
            indexedFieldNames.add(name);
        } else {
            notIndexedFieldNames.add(name);
        }
        if (fieldInfo.hasVectors()) {
            tvFieldNames.add(name);
        }
    }
    assertTrue(allFieldNames.contains("keyword"));
    assertTrue(allFieldNames.contains("text"));
    assertTrue(allFieldNames.contains("unindexed"));
    assertTrue(allFieldNames.contains("unstored"));
    assertTrue(allFieldNames.contains("keyword2"));
    assertTrue(allFieldNames.contains("text2"));
    assertTrue(allFieldNames.contains("unindexed2"));
    assertTrue(allFieldNames.contains("unstored2"));
    assertTrue(allFieldNames.contains("tvnot"));
    assertTrue(allFieldNames.contains("termvector"));
    assertTrue(allFieldNames.contains("tvposition"));
    assertTrue(allFieldNames.contains("tvoffset"));
    assertTrue(allFieldNames.contains("tvpositionoffset"));
    // verify that only indexed fields were returned
    // 6 original + the 5 termvector fields 
    assertEquals(11, indexedFieldNames.size());
    assertTrue(indexedFieldNames.contains("keyword"));
    assertTrue(indexedFieldNames.contains("text"));
    assertTrue(indexedFieldNames.contains("unstored"));
    assertTrue(indexedFieldNames.contains("keyword2"));
    assertTrue(indexedFieldNames.contains("text2"));
    assertTrue(indexedFieldNames.contains("unstored2"));
    assertTrue(indexedFieldNames.contains("tvnot"));
    assertTrue(indexedFieldNames.contains("termvector"));
    assertTrue(indexedFieldNames.contains("tvposition"));
    assertTrue(indexedFieldNames.contains("tvoffset"));
    assertTrue(indexedFieldNames.contains("tvpositionoffset"));
    // verify that only unindexed fields were returned
    // the following fields
    assertEquals(2, notIndexedFieldNames.size());
    assertTrue(notIndexedFieldNames.contains("unindexed"));
    assertTrue(notIndexedFieldNames.contains("unindexed2"));
    // verify index term vector fields  
    // 4 field has term vector only
    assertEquals(tvFieldNames.toString(), 4, tvFieldNames.size());
    assertTrue(tvFieldNames.contains("termvector"));
    reader.close();
    d.close();
}
Also used : Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 79 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestDocValuesIndexing method testSameFieldNameForPostingAndDocValue.

public void testSameFieldNameForPostingAndDocValue() throws Exception {
    // LUCENE-5192: FieldInfos.Builder neglected to update
    // globalFieldNumbers.docValuesType map if the field existed, resulting in
    // potentially adding the same field with different DV types.
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new StringField("f", "mock-value", Store.NO));
    doc.add(new NumericDocValuesField("f", 5));
    writer.addDocument(doc);
    writer.commit();
    Document doc2 = new Document();
    doc2.add(new BinaryDocValuesField("f", new BytesRef("mock")));
    expectThrows(IllegalArgumentException.class, () -> {
        writer.addDocument(doc2);
    });
    writer.rollback();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 80 with StringField

use of org.apache.lucene.document.StringField in project lucene-solr by apache.

the class TestDocValuesIndexing method testMixedTypesAfterReopenAppend2.

public void testMixedTypesAfterReopenAppend2() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new SortedSetDocValuesField("foo", new BytesRef("foo")));
    w.addDocument(doc);
    w.close();
    Document doc2 = new Document();
    IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    doc2.add(new StringField("foo", "bar", Field.Store.NO));
    doc2.add(new BinaryDocValuesField("foo", new BytesRef("foo")));
    // NOTE: this case follows a different code path inside
    // DefaultIndexingChain/FieldInfos, because the field (foo)
    // is first added without DocValues:
    expectThrows(IllegalArgumentException.class, () -> {
        w2.addDocument(doc2);
    });
    w2.forceMerge(1);
    w2.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

StringField (org.apache.lucene.document.StringField)323 Document (org.apache.lucene.document.Document)302 Directory (org.apache.lucene.store.Directory)227 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)129 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)94 Term (org.apache.lucene.index.Term)90 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)82 BytesRef (org.apache.lucene.util.BytesRef)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)57 DirectoryReader (org.apache.lucene.index.DirectoryReader)56 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)55 ArrayList (java.util.ArrayList)54 TextField (org.apache.lucene.document.TextField)54 IndexReader (org.apache.lucene.index.IndexReader)51 Field (org.apache.lucene.document.Field)50 TermQuery (org.apache.lucene.search.TermQuery)50 IndexWriter (org.apache.lucene.index.IndexWriter)45 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)43 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)43 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)40