Search in sources :

Example 56 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class SimpleFragmentsBuilderTest method makeUnstoredIndex.

protected void makeUnstoredIndex() throws Exception {
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    doc.add(new Field(F, "aaa", customType));
    //doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    writer.addDocument(doc);
    writer.close();
    if (reader != null)
        reader.close();
    reader = DirectoryReader.open(dir);
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 57 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestPostingsOffsets method checkTokens.

// TODO: more tests with other possibilities
private void checkTokens(Token[] field1, Token[] field2) throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
    boolean success = false;
    try {
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        // store some term vectors for the checkindex cross-check
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        ft.setStoreTermVectorOffsets(true);
        Document doc = new Document();
        doc.add(new Field("body", new CannedTokenStream(field1), ft));
        doc.add(new Field("body", new CannedTokenStream(field2), ft));
        riw.addDocument(doc);
        riw.close();
        success = true;
    } finally {
        if (success) {
            IOUtils.close(dir);
        } else {
            IOUtils.closeWhileHandlingException(riw, dir);
        }
    }
}
Also used : StringField(org.apache.lucene.document.StringField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Directory(org.apache.lucene.store.Directory) FieldType(org.apache.lucene.document.FieldType)

Example 58 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestIndexSorting method testRandom2.

public void testRandom2() throws Exception {
    int numDocs = atLeast(100);
    FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
    POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    POSITIONS_TYPE.freeze();
    FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
    TERM_VECTORS_TYPE.setStoreTermVectors(true);
    TERM_VECTORS_TYPE.freeze();
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            return new TokenStreamComponents(tokenizer, tokenizer);
        }
    };
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        int id = i * 10;
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(id), Store.YES));
        doc.add(new StringField("docs", "#all#", Store.NO));
        PositionsTokenStream positions = new PositionsTokenStream();
        positions.setId(id);
        doc.add(new Field("positions", positions, POSITIONS_TYPE));
        doc.add(new NumericDocValuesField("numeric", id));
        String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
        TextField norms = new TextField("norms", value, Store.NO);
        doc.add(norms);
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
        doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
        doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
        doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
        doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
        doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
        byte[] bytes = new byte[4];
        NumericUtils.intToSortableBytes(id, bytes, 0);
        doc.add(new BinaryPoint("points", bytes));
        docs.add(doc);
    }
    // Must use the same seed for both RandomIndexWriters so they behave identically
    long seed = random().nextLong();
    // We add document alread in ID order for the first writer:
    Directory dir1 = newFSDirectory(createTempDir());
    Random random1 = new Random(seed);
    IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
    // for testing norms field
    iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity()));
    // preserve docIDs
    iwc1.setMergePolicy(newLogMergePolicy());
    if (VERBOSE) {
        System.out.println("TEST: now index pre-sorted");
    }
    RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
    for (Document doc : docs) {
        ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
        w1.addDocument(doc);
    }
    // We shuffle documents, but set index sort, for the second writer:
    Directory dir2 = newFSDirectory(createTempDir());
    Random random2 = new Random(seed);
    IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
    // for testing norms field
    iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity()));
    Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
    iwc2.setIndexSort(sort);
    Collections.shuffle(docs, random());
    if (VERBOSE) {
        System.out.println("TEST: now index with index-time sorting");
    }
    RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
    int count = 0;
    int commitAtCount = TestUtil.nextInt(random(), 1, numDocs - 1);
    for (Document doc : docs) {
        ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
        if (count++ == commitAtCount) {
            // Ensure forceMerge really does merge
            w2.commit();
        }
        w2.addDocument(doc);
    }
    if (VERBOSE) {
        System.out.println("TEST: now force merge");
    }
    w2.forceMerge(1);
    DirectoryReader r1 = w1.getReader();
    DirectoryReader r2 = w2.getReader();
    if (VERBOSE) {
        System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
    }
    assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
    assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
    IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
Also used : Query(org.apache.lucene.search.Query) ScoreDoc(org.apache.lucene.search.ScoreDoc) BinaryPoint(org.apache.lucene.document.BinaryPoint) FieldType(org.apache.lucene.document.FieldType) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericUtils(org.apache.lucene.util.NumericUtils) Random(java.util.Random) StoredField(org.apache.lucene.document.StoredField) FieldDoc(org.apache.lucene.search.FieldDoc) FilterCodec(org.apache.lucene.codecs.FilterCodec) NO_MORE_DOCS(org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS) Document(org.apache.lucene.document.Document) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) SortField(org.apache.lucene.search.SortField) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Set(java.util.Set) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Collectors(java.util.stream.Collectors) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PointsFormat(org.apache.lucene.codecs.PointsFormat) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) IndexSearcher(org.apache.lucene.search.IndexSearcher) IntStream(java.util.stream.IntStream) PointsReader(org.apache.lucene.codecs.PointsReader) Tokenizer(org.apache.lucene.analysis.Tokenizer) StringField(org.apache.lucene.document.StringField) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TestUtil(org.apache.lucene.util.TestUtil) HashMap(java.util.HashMap) FixedBitSet(org.apache.lucene.util.FixedBitSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Similarity(org.apache.lucene.search.similarities.Similarity) Store(org.apache.lucene.document.Field.Store) IntPoint(org.apache.lucene.document.IntPoint) TermStatistics(org.apache.lucene.search.TermStatistics) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) TopDocs(org.apache.lucene.search.TopDocs) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Analyzer(org.apache.lucene.analysis.Analyzer) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) Consumer(java.util.function.Consumer) PointsWriter(org.apache.lucene.codecs.PointsWriter) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermQuery(org.apache.lucene.search.TermQuery) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Collections(java.util.Collections) BinaryPoint(org.apache.lucene.document.BinaryPoint) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Random(java.util.Random) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) Sort(org.apache.lucene.search.Sort) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField)

Example 59 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestIndexWriter method testNoUnwantedTVFiles.

public void testNoUnwantedTVFiles() throws Exception {
    Directory dir = newDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy()));
    indexWriter.getConfig().getMergePolicy().setNoCFSRatio(0.0);
    String BIG = "alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
    BIG = BIG + BIG + BIG + BIG;
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setOmitNorms(true);
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setTokenized(false);
    FieldType customType3 = new FieldType(TextField.TYPE_STORED);
    customType3.setTokenized(false);
    customType3.setOmitNorms(true);
    for (int i = 0; i < 2; i++) {
        Document doc = new Document();
        doc.add(new Field("id", Integer.toString(i) + BIG, customType3));
        doc.add(new Field("str", Integer.toString(i) + BIG, customType2));
        doc.add(new Field("str2", Integer.toString(i) + BIG, storedTextType));
        doc.add(new Field("str3", Integer.toString(i) + BIG, customType));
        indexWriter.addDocument(doc);
    }
    indexWriter.close();
    TestUtil.checkIndex(dir);
    assertNoUnreferencedFiles(dir, "no tv files");
    DirectoryReader r0 = DirectoryReader.open(dir);
    for (LeafReaderContext ctx : r0.leaves()) {
        SegmentReader sr = (SegmentReader) ctx.reader();
        assertFalse(sr.getFieldInfos().hasVectors());
    }
    r0.close();
    dir.close();
}
Also used : SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) FieldType(org.apache.lucene.document.FieldType)

Example 60 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestExceedMaxTermLength method test.

public void test() throws Exception {
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(random(), new MockAnalyzer(random())));
    try {
        final FieldType ft = new FieldType();
        ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        ft.setStored(random().nextBoolean());
        ft.freeze();
        final Document doc = new Document();
        if (random().nextBoolean()) {
            // totally ok short field value
            doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft));
        }
        // problematic field
        final String name = TestUtil.randomSimpleString(random(), 1, 50);
        final String value = TestUtil.randomSimpleString(random(), minTestTermLength, maxTestTermLegnth);
        final Field f = new Field(name, value, ft);
        if (random().nextBoolean()) {
            // totally ok short field value
            doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10), TestUtil.randomSimpleString(random(), 1, 10), ft));
        }
        doc.add(f);
        IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
            w.addDocument(doc);
        });
        String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
        String msg = expected.getMessage();
        assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg, msg.contains("immense term"));
        assertTrue("IllegalArgumentException didn't mention max length (" + maxLengthMsg + "): " + msg, msg.contains(maxLengthMsg));
        assertTrue("IllegalArgumentException didn't mention field name (" + name + "): " + msg, msg.contains(name));
        assertTrue("IllegalArgumentException didn't mention original message: " + msg, msg.contains("bytes can be at most") && msg.contains("in length; got"));
    } finally {
        w.close();
    }
}
Also used : Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType)

Aggregations

FieldType (org.apache.lucene.document.FieldType)262 Document (org.apache.lucene.document.Document)229 Field (org.apache.lucene.document.Field)191 Directory (org.apache.lucene.store.Directory)172 TextField (org.apache.lucene.document.TextField)166 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)125 StringField (org.apache.lucene.document.StringField)72 StoredField (org.apache.lucene.document.StoredField)65 IndexReader (org.apache.lucene.index.IndexReader)49 IndexWriter (org.apache.lucene.index.IndexWriter)49 BytesRef (org.apache.lucene.util.BytesRef)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 IndexSearcher (org.apache.lucene.search.IndexSearcher)45 Term (org.apache.lucene.index.Term)40 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)38 RAMDirectory (org.apache.lucene.store.RAMDirectory)37 TermQuery (org.apache.lucene.search.TermQuery)33 TopDocs (org.apache.lucene.search.TopDocs)32 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)30 Analyzer (org.apache.lucene.analysis.Analyzer)27