Examples with FieldType - org.apache.lucene.document.FieldType

Example 76 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestOrdValues method addDoc.

private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
    Document d = new Document();
    Field f;
    int scoreAndID = i + 1;
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setTokenized(false);
    customType.setOmitNorms(true);
    // for debug purposes
    f = newField(ID_FIELD, id2String(scoreAndID), customType);
    d.add(f);
    d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));
    FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
    customType2.setOmitNorms(true);
    // for regular search
    f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2);
    d.add(f);
    // for function scoring
    f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES);
    d.add(f);
    d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));
    // for function scoring
    f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES);
    d.add(f);
    d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));
    iw.addDocument(d);
    log("added: " + d);
}

Also used : LegacyFloatField(org.apache.solr.legacy.LegacyFloatField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LegacyIntField(org.apache.solr.legacy.LegacyIntField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LegacyFloatField(org.apache.solr.legacy.LegacyFloatField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) FieldType(org.apache.lucene.document.FieldType) LegacyIntField(org.apache.solr.legacy.LegacyIntField)

Example 77 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class PreAnalyzedUpdateProcessor method mutate.

@Override
protected SolrInputField mutate(SolrInputField src) {
    SchemaField sf = schema.getFieldOrNull(src.getName());
    if (sf == null) {
        // remove this field
        return null;
    }
    FieldType type = PreAnalyzedField.createFieldType(sf);
    if (type == null) {
        // neither indexed nor stored - skip
        return null;
    }
    SolrInputField res = new SolrInputField(src.getName());
    for (Object o : src) {
        if (o == null) {
            continue;
        }
        Field pre = (Field) parser.createField(sf, o);
        if (pre != null) {
            res.addValue(pre);
        } else {
            // restore the original value
            log.warn("Could not parse field {} - using original value as is: {}", src.getName(), o);
            res.addValue(o);
        }
    }
    return res;
}

Also used : SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) SolrInputField(org.apache.solr.common.SolrInputField) SchemaField(org.apache.solr.schema.SchemaField) Field(org.apache.lucene.document.Field) PreAnalyzedField(org.apache.solr.schema.PreAnalyzedField) SolrInputField(org.apache.solr.common.SolrInputField) FieldType(org.apache.lucene.document.FieldType)

Example 78 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class TestTeeSinkTokenFilter method testEndOffsetPositionWithTeeSinkTokenFilter.

// LUCENE-1448
// TODO: instead of testing it this way, we can test 
// with BaseTokenStreamTestCase now...
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
    Document doc = new Document();
    TokenStream tokenStream = analyzer.tokenStream("field", "abcd   ");
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
    TokenStream sink = tee.newSinkTokenStream();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    Field f1 = new Field("field", tee, ft);
    Field f2 = new Field("field", sink, ft);
    doc.add(f1);
    doc.add(f2);
    w.addDocument(doc);
    w.close();
    IndexReader r = DirectoryReader.open(dir);
    Terms vector = r.getTermVectors(0).terms("field");
    assertEquals(1, vector.size());
    TermsEnum termsEnum = vector.iterator();
    termsEnum.next();
    assertEquals(2, termsEnum.totalTermFreq());
    PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
    assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(2, positions.freq());
    positions.nextPosition();
    assertEquals(0, positions.startOffset());
    assertEquals(4, positions.endOffset());
    positions.nextPosition();
    assertEquals(8, positions.startOffset());
    assertEquals(12, positions.endOffset());
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
    r.close();
    dir.close();
    analyzer.close();
}

Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Terms(org.apache.lucene.index.Terms) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TermsEnum(org.apache.lucene.index.TermsEnum) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) PostingsEnum(org.apache.lucene.index.PostingsEnum) Directory(org.apache.lucene.store.Directory)

Example 79 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class Test2BPostingsBytes method test.

public void test() throws Exception {
    IndexWriterConfig defaultConfig = new IndexWriterConfig(null);
    Codec defaultCodec = defaultConfig.getCodec();
    if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) {
        Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)");
        Matcher matcher = regex.matcher(defaultCodec.toString());
        assertTrue("Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find());
        int maxDocsPerChunk = Integer.parseInt(matcher.group(1));
        int blockSize = Integer.parseInt(matcher.group(2));
        int product = maxDocsPerChunk * blockSize;
        assumeTrue(defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16);
    }
    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1"));
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
    MergePolicy mp = w.getConfig().getMergePolicy();
    if (mp instanceof LogByteSizeMergePolicy) {
        // 1 petabyte:
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
    }
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(true);
    MyTokenStream tokenStream = new MyTokenStream();
    Field field = new Field("field", tokenStream, ft);
    doc.add(field);
    final int numDocs = 1000;
    for (int i = 0; i < numDocs; i++) {
        if (i % 2 == 1) {
            // trick blockPF's little optimization
            tokenStream.n = 65536;
        } else {
            tokenStream.n = 65537;
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    w.close();
    DirectoryReader oneThousand = DirectoryReader.open(dir);
    DirectoryReader[] subReaders = new DirectoryReader[1000];
    Arrays.fill(subReaders, oneThousand);
    BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2"));
    if (dir2 instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w2, subReaders);
    w2.forceMerge(1);
    w2.close();
    oneThousand.close();
    DirectoryReader oneMillion = DirectoryReader.open(dir2);
    subReaders = new DirectoryReader[2000];
    Arrays.fill(subReaders, oneMillion);
    BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3"));
    if (dir3 instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null));
    TestUtil.addIndexesSlowly(w3, subReaders);
    w3.forceMerge(1);
    w3.close();
    oneMillion.close();
    dir.close();
    dir2.close();
    dir3.close();
}

Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) Pattern(java.util.regex.Pattern) CompressingCodec(org.apache.lucene.codecs.compressing.CompressingCodec) Matcher(java.util.regex.Matcher) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CompressingCodec(org.apache.lucene.codecs.compressing.CompressingCodec) Codec(org.apache.lucene.codecs.Codec) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper)

Example 80 with FieldType

use of org.apache.lucene.document.FieldType in project lucene-solr by apache.

the class Test2BTerms method test2BTerms.

public void test2BTerms() throws IOException {
    System.out.println("Starting Test2B");
    final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
    final int TERMS_PER_DOC = TestUtil.nextInt(random(), 100000, 1000000);
    List<BytesRef> savedTerms = null;
    BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BTerms"));
    //MockDirectoryWrapper dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
    if (dir instanceof MockDirectoryWrapper) {
        ((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
    }
    // don't double-checkindex
    dir.setCheckIndexOnClose(false);
    if (true) {
        IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
        MergePolicy mp = w.getConfig().getMergePolicy();
        if (mp instanceof LogByteSizeMergePolicy) {
            // 1 petabyte:
            ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
        }
        Document doc = new Document();
        final MyTokenStream ts = new MyTokenStream(random(), TERMS_PER_DOC);
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setIndexOptions(IndexOptions.DOCS);
        customType.setOmitNorms(true);
        Field field = new Field("field", ts, customType);
        doc.add(field);
        //w.setInfoStream(System.out);
        final int numDocs = (int) (TERM_COUNT / TERMS_PER_DOC);
        System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC);
        System.out.println("numDocs=" + numDocs);
        for (int i = 0; i < numDocs; i++) {
            final long t0 = System.currentTimeMillis();
            w.addDocument(doc);
            System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis() - t0) + " msec");
        }
        savedTerms = ts.savedTerms;
        System.out.println("TEST: full merge");
        w.forceMerge(1);
        System.out.println("TEST: close writer");
        w.close();
    }
    System.out.println("TEST: open reader");
    final IndexReader r = DirectoryReader.open(dir);
    if (savedTerms == null) {
        savedTerms = findTerms(r);
    }
    final int numSavedTerms = savedTerms.size();
    final List<BytesRef> bigOrdTerms = new ArrayList<>(savedTerms.subList(numSavedTerms - 10, numSavedTerms));
    System.out.println("TEST: test big ord terms...");
    testSavedTerms(r, bigOrdTerms);
    System.out.println("TEST: test all saved terms...");
    testSavedTerms(r, savedTerms);
    r.close();
    System.out.println("TEST: now CheckIndex...");
    CheckIndex.Status status = TestUtil.checkIndex(dir);
    final long tc = status.segmentInfos.get(0).termIndexStatus.termCount;
    assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE);
    dir.close();
    System.out.println("TEST: done!");
}

Also used : MockDirectoryWrapper(org.apache.lucene.store.MockDirectoryWrapper) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

FieldType (org.apache.lucene.document.FieldType)262 Document (org.apache.lucene.document.Document)229 Field (org.apache.lucene.document.Field)191 Directory (org.apache.lucene.store.Directory)172 TextField (org.apache.lucene.document.TextField)166 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)125 StringField (org.apache.lucene.document.StringField)72 StoredField (org.apache.lucene.document.StoredField)65 IndexReader (org.apache.lucene.index.IndexReader)49 IndexWriter (org.apache.lucene.index.IndexWriter)49 BytesRef (org.apache.lucene.util.BytesRef)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 IndexSearcher (org.apache.lucene.search.IndexSearcher)45 Term (org.apache.lucene.index.Term)40 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)38 RAMDirectory (org.apache.lucene.store.RAMDirectory)37 TermQuery (org.apache.lucene.search.TermQuery)33 TopDocs (org.apache.lucene.search.TopDocs)32 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)30 Analyzer (org.apache.lucene.analysis.Analyzer)27