Search in sources :

Example 1 with Payload

use of org.apache.lucene.index.Payload in project jackrabbit by apache.

the class AbstractIndex method getFinishedDocument.

/**
     * Returns a document that is finished with text extraction and is ready to
     * be added to the index.
     *
     * @param doc the document to check.
     * @return <code>doc</code> if it is finished already or a stripped down
     *         copy of <code>doc</code> without text extractors.
     * @throws IOException if the document cannot be added to the indexing
     *                     queue.
     */
private Document getFinishedDocument(Document doc) throws IOException {
    if (!Util.isDocumentReady(doc)) {
        Document copy = new Document();
        // mark the document that reindexing is required
        copy.add(new Field(FieldNames.REINDEXING_REQUIRED, false, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
        for (Fieldable f : doc.getFields()) {
            Fieldable field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = f.isStored() ? Field.Store.YES : Field.Store.NO;
            Field.Index indexed = getIndexParameter(f);
            if (f instanceof LazyTextExtractorField || f.readerValue() != null) {
                // replace all readers with empty string reader
                field = new Field(f.name(), new StringReader(""), tv);
            } else if (f.stringValue() != null) {
                field = new Field(f.name(), false, f.stringValue(), stored, indexed, tv);
            } else if (f.isBinary()) {
                field = new Field(f.name(), f.getBinaryValue(), stored);
            } else if (f.tokenStreamValue() != null && f.tokenStreamValue() instanceof SingletonTokenStream) {
                TokenStream tokenStream = f.tokenStreamValue();
                TermAttribute termAttribute = tokenStream.addAttribute(TermAttribute.class);
                PayloadAttribute payloadAttribute = tokenStream.addAttribute(PayloadAttribute.class);
                tokenStream.incrementToken();
                String value = new String(termAttribute.termBuffer(), 0, termAttribute.termLength());
                tokenStream.reset();
                field = new Field(f.name(), new SingletonTokenStream(value, (Payload) payloadAttribute.getPayload().clone()));
            }
            if (field != null) {
                field.setOmitNorms(f.getOmitNorms());
                copy.add(field);
            }
        }
        // schedule the original document for later indexing
        Document existing = indexingQueue.addDocument(doc);
        if (existing != null) {
            // the queue already contained a pending document for this
            // node. -> dispose the document
            Util.disposeDocument(existing);
        }
        // use the stripped down copy for now
        doc = copy;
    }
    return doc;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) Fieldable(org.apache.lucene.document.Fieldable) StringReader(java.io.StringReader) TermAttribute(org.apache.lucene.analysis.tokenattributes.TermAttribute) Payload(org.apache.lucene.index.Payload)

Aggregations

StringReader (java.io.StringReader)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 PayloadAttribute (org.apache.lucene.analysis.tokenattributes.PayloadAttribute)1 TermAttribute (org.apache.lucene.analysis.tokenattributes.TermAttribute)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 Fieldable (org.apache.lucene.document.Fieldable)1 Payload (org.apache.lucene.index.Payload)1