use of org.apache.lucene.index.Payload in project jackrabbit by apache.
the class AbstractIndex method getFinishedDocument.
/**
* Returns a document that is finished with text extraction and is ready to
* be added to the index.
*
* @param doc the document to check.
* @return <code>doc</code> if it is finished already or a stripped down
* copy of <code>doc</code> without text extractors.
* @throws IOException if the document cannot be added to the indexing
* queue.
*/
private Document getFinishedDocument(Document doc) throws IOException {
if (!Util.isDocumentReady(doc)) {
Document copy = new Document();
// mark the document that reindexing is required
copy.add(new Field(FieldNames.REINDEXING_REQUIRED, false, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
for (Fieldable f : doc.getFields()) {
Fieldable field = null;
Field.TermVector tv = getTermVectorParameter(f);
Field.Store stored = f.isStored() ? Field.Store.YES : Field.Store.NO;
Field.Index indexed = getIndexParameter(f);
if (f instanceof LazyTextExtractorField || f.readerValue() != null) {
// replace all readers with empty string reader
field = new Field(f.name(), new StringReader(""), tv);
} else if (f.stringValue() != null) {
field = new Field(f.name(), false, f.stringValue(), stored, indexed, tv);
} else if (f.isBinary()) {
field = new Field(f.name(), f.getBinaryValue(), stored);
} else if (f.tokenStreamValue() != null && f.tokenStreamValue() instanceof SingletonTokenStream) {
TokenStream tokenStream = f.tokenStreamValue();
TermAttribute termAttribute = tokenStream.addAttribute(TermAttribute.class);
PayloadAttribute payloadAttribute = tokenStream.addAttribute(PayloadAttribute.class);
tokenStream.incrementToken();
String value = new String(termAttribute.termBuffer(), 0, termAttribute.termLength());
tokenStream.reset();
field = new Field(f.name(), new SingletonTokenStream(value, (Payload) payloadAttribute.getPayload().clone()));
}
if (field != null) {
field.setOmitNorms(f.getOmitNorms());
copy.add(field);
}
}
// schedule the original document for later indexing
Document existing = indexingQueue.addDocument(doc);
if (existing != null) {
// the queue already contained a pending document for this
// node. -> dispose the document
Util.disposeDocument(existing);
}
// use the stripped down copy for now
doc = copy;
}
return doc;
}
Aggregations