Search in sources :

Example 1 with IndexableFieldToXContent

use of org.xbib.elasticsearch.action.skywalker.support.IndexableFieldToXContent in project elasticsearch-skywalker by jprante.

the class DocumentReconstructor method reconstruct.

/**
     * Reconstruct an index shard
     *
     * @return reconstructed document
     * @throws Exception
     */
public XContentBuilder reconstruct(int shardId) throws IOException {
    XContentBuilder builder = jsonBuilder();
    builder.startObject().field("shardId", shardId).field("numDeletions", reader.numDeletedDocs());
    builder.startArray("docs");
    FieldInfos fieldInfos = reader.getFieldInfos();
    Bits live = MultiFields.getLiveDocs(reader);
    for (int docNum = 0; docNum < reader.maxDoc(); docNum++) {
        Document doc = reader.document(docNum);
        if (live != null && live.get(docNum)) {
            // not deleted
            continue;
        }
        builder.startObject().startArray("fields");
        if (fieldInfos != null) {
            for (FieldInfo fi : fieldInfos) {
                String name = fi.name;
                IndexableField[] fs = doc.getFields(name);
                if (fs != null && fs.length > 0) {
                    for (IndexableField f : fs) {
                        IndexableFieldToXContent x = new IndexableFieldToXContent().field(f);
                        x.toXContent(builder, ToXContent.EMPTY_PARAMS);
                    }
                }
            }
        }
        builder.endArray();
        builder.startArray("terms");
        if (fieldInfos != null) {
            TermsEnum te = null;
            DocsAndPositionsEnum dpe = null;
            for (FieldInfo fi : fieldInfos) {
                Terms terms = MultiFields.getTerms(reader, fi.name);
                if (terms == null) {
                    // no terms in this field
                    continue;
                }
                te = terms.iterator(te);
                while (te.next() != null) {
                    DocsAndPositionsEnum newDpe = te.docsAndPositions(live, dpe, 0);
                    if (newDpe == null) {
                        // no position info for this field
                        break;
                    }
                    dpe = newDpe;
                    int num = dpe.advance(docNum);
                    if (num != docNum) {
                        // no data for this term in this doc
                        continue;
                    }
                    String text = te.term().utf8ToString();
                    List<Integer> positions = new ArrayList();
                    List<Integer> starts = new ArrayList();
                    List<Integer> ends = new ArrayList();
                    for (int k = 0; k < dpe.freq(); k++) {
                        int pos = dpe.nextPosition();
                        positions.add(pos);
                        starts.add(dpe.startOffset());
                        ends.add(dpe.endOffset());
                    }
                    builder.startObject().field("text", text).field("positions", positions).field("starts", starts).field("ends", ends).field("count", dpe.freq()).endObject();
                }
            }
        }
        builder.endArray();
        builder.endObject();
    }
    builder.endArray();
    builder.endObject();
    return builder;
}
Also used : Terms(org.apache.lucene.index.Terms) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) FieldInfos(org.apache.lucene.index.FieldInfos) IndexableField(org.apache.lucene.index.IndexableField) IndexableFieldToXContent(org.xbib.elasticsearch.action.skywalker.support.IndexableFieldToXContent) DocsAndPositionsEnum(org.apache.lucene.index.DocsAndPositionsEnum) Bits(org.apache.lucene.util.Bits) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

ArrayList (java.util.ArrayList)1 Document (org.apache.lucene.document.Document)1 DocsAndPositionsEnum (org.apache.lucene.index.DocsAndPositionsEnum)1 FieldInfo (org.apache.lucene.index.FieldInfo)1 FieldInfos (org.apache.lucene.index.FieldInfos)1 IndexableField (org.apache.lucene.index.IndexableField)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 Bits (org.apache.lucene.util.Bits)1 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)1 IndexableFieldToXContent (org.xbib.elasticsearch.action.skywalker.support.IndexableFieldToXContent)1