Search in sources :

Example 21 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project elasticsearch-skywalker by jprante.

the class TransportSkywalkerAction method shardOperation.

@Override
protected ShardSkywalkerResponse shardOperation(ShardSkywalkerRequest request) throws ElasticsearchException {
    synchronized (mutex) {
        IndexService indexService = indicesService.indexServiceSafe(request.index());
        InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId());
        MapperService mapperService = indexService.mapperService();
        Engine.Searcher searcher = indexShard.acquireSearcher("skywalker_action");
        try {
            IndexReader reader = searcher.reader();
            Skywalker skywalker = new Skywalker(reader);
            Map<String, Object> response = new HashMap();
            Directory directory = indexShard.store().directory();
            List indexFiles = new ArrayList();
            for (String f : skywalker.getIndexFiles(directory)) {
                Map indexFile = new HashMap();
                indexFile.put("name", f);
                indexFile.put("function", skywalker.getFileFunction(f));
                indexFiles.add(indexFile);
            }
            response.put("indexFiles", indexFiles);
            skywalker.getStoreMetadata(response, indexShard.store().getMetadata());
            response.put("indexVersion", skywalker.getVersion());
            response.put("directoryImpl", skywalker.getDirImpl());
            response.put("numDocs", reader.numDocs());
            response.put("maxDoc", reader.maxDoc());
            response.put("hasDeletions", reader.hasDeletions());
            response.put("numDeletedDocs", reader.numDeletedDocs());
            Set<FieldTermCount> ftc = skywalker.getFieldTermCounts();
            response.put("numTerms", skywalker.getNumTerms());
            Map indexFormatInfo = new HashMap();
            FormatDetails details = skywalker.getFormatDetails();
            indexFormatInfo.put("version", details.getVersion());
            indexFormatInfo.put("genericName", details.getGenericName());
            indexFormatInfo.put("capabilities", details.getCapabilities());
            response.put("indexFormat", indexFormatInfo);
            List commits = new ArrayList();
            Iterator<Segment> it = indexShard.engine().segments().iterator();
            while (it.hasNext()) {
                Segment segment = it.next();
                Map m = new HashMap();
                m.put("segment", segment.getName());
                m.put("count", segment.getNumDocs());
                m.put("deleted", segment.getDeletedDocs());
                m.put("generation", segment.getGeneration());
                m.put("sizeInBytes", segment.getSizeInBytes());
                m.put("version", segment.getVersion());
                m.put("committed", segment.committed);
                m.put("compound", segment.compound);
                m.put("size", segment.getSize().toString());
                commits.add(m);
            }
            response.put("commits", commits);
            List fieldInfos = new ArrayList();
            for (FieldInfo fi : MultiFields.getMergedFieldInfos(reader)) {
                fieldInfos.add(skywalker.getFieldInfo(mapperService, fi));
            }
            response.put("fieldInfos", fieldInfos);
            List termList = new ArrayList();
            for (TermStats ts : skywalker.getTopTerms(50)) {
                Map m = new HashMap();
                m.put("field", ts.field());
                m.put("text", ts.text());
                m.put("docFreq", ts.docFreq());
                termList.add(m);
            }
            response.put("topterms", termList);
            return new ShardSkywalkerResponse(request.index(), request.shardId()).setResponse(response);
        } catch (Exception ex) {
            throw new ElasticsearchException(ex.getMessage(), ex);
        }
    }
}
Also used : IndexService(org.elasticsearch.index.service.IndexService) Lists.newArrayList(org.elasticsearch.common.collect.Lists.newArrayList) ElasticsearchException(org.elasticsearch.ElasticsearchException) Skywalker(org.xbib.elasticsearch.skywalker.Skywalker) Segment(org.elasticsearch.index.engine.Segment) FieldTermCount(org.xbib.elasticsearch.skywalker.stats.FieldTermCount) FormatDetails(org.xbib.elasticsearch.skywalker.FormatDetails) Lists.newArrayList(org.elasticsearch.common.collect.Lists.newArrayList) Engine(org.elasticsearch.index.engine.Engine) Directory(org.apache.lucene.store.Directory) TermStats(org.xbib.elasticsearch.skywalker.stats.TermStats) InternalIndexShard(org.elasticsearch.index.shard.service.InternalIndexShard) ElasticsearchException(org.elasticsearch.ElasticsearchException) ClusterBlockException(org.elasticsearch.cluster.block.ClusterBlockException) BroadcastShardOperationFailedException(org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException) ShardOperationFailedException(org.elasticsearch.action.ShardOperationFailedException) DefaultShardOperationFailedException(org.elasticsearch.action.support.DefaultShardOperationFailedException) IndexReader(org.apache.lucene.index.IndexReader) MapperService(org.elasticsearch.index.mapper.MapperService) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 22 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project elasticsearch-skywalker by jprante.

the class DocumentReconstructor method reconstruct.

/**
     * Reconstruct an index shard
     *
     * @return reconstructed document
     * @throws Exception
     */
public XContentBuilder reconstruct(int shardId) throws IOException {
    XContentBuilder builder = jsonBuilder();
    builder.startObject().field("shardId", shardId).field("numDeletions", reader.numDeletedDocs());
    builder.startArray("docs");
    FieldInfos fieldInfos = reader.getFieldInfos();
    Bits live = MultiFields.getLiveDocs(reader);
    for (int docNum = 0; docNum < reader.maxDoc(); docNum++) {
        Document doc = reader.document(docNum);
        if (live != null && live.get(docNum)) {
            // not deleted
            continue;
        }
        builder.startObject().startArray("fields");
        if (fieldInfos != null) {
            for (FieldInfo fi : fieldInfos) {
                String name = fi.name;
                IndexableField[] fs = doc.getFields(name);
                if (fs != null && fs.length > 0) {
                    for (IndexableField f : fs) {
                        IndexableFieldToXContent x = new IndexableFieldToXContent().field(f);
                        x.toXContent(builder, ToXContent.EMPTY_PARAMS);
                    }
                }
            }
        }
        builder.endArray();
        builder.startArray("terms");
        if (fieldInfos != null) {
            TermsEnum te = null;
            DocsAndPositionsEnum dpe = null;
            for (FieldInfo fi : fieldInfos) {
                Terms terms = MultiFields.getTerms(reader, fi.name);
                if (terms == null) {
                    // no terms in this field
                    continue;
                }
                te = terms.iterator(te);
                while (te.next() != null) {
                    DocsAndPositionsEnum newDpe = te.docsAndPositions(live, dpe, 0);
                    if (newDpe == null) {
                        // no position info for this field
                        break;
                    }
                    dpe = newDpe;
                    int num = dpe.advance(docNum);
                    if (num != docNum) {
                        // no data for this term in this doc
                        continue;
                    }
                    String text = te.term().utf8ToString();
                    List<Integer> positions = new ArrayList();
                    List<Integer> starts = new ArrayList();
                    List<Integer> ends = new ArrayList();
                    for (int k = 0; k < dpe.freq(); k++) {
                        int pos = dpe.nextPosition();
                        positions.add(pos);
                        starts.add(dpe.startOffset());
                        ends.add(dpe.endOffset());
                    }
                    builder.startObject().field("text", text).field("positions", positions).field("starts", starts).field("ends", ends).field("count", dpe.freq()).endObject();
                }
            }
        }
        builder.endArray();
        builder.endObject();
    }
    builder.endArray();
    builder.endObject();
    return builder;
}
Also used : Terms(org.apache.lucene.index.Terms) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) TermsEnum(org.apache.lucene.index.TermsEnum) FieldInfos(org.apache.lucene.index.FieldInfos) IndexableField(org.apache.lucene.index.IndexableField) IndexableFieldToXContent(org.xbib.elasticsearch.action.skywalker.support.IndexableFieldToXContent) DocsAndPositionsEnum(org.apache.lucene.index.DocsAndPositionsEnum) Bits(org.apache.lucene.util.Bits) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 23 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class TestInPlaceUpdatesStandalone method testUpdateOfNonExistentDVsShouldNotFail.

@Test
public void testUpdateOfNonExistentDVsShouldNotFail() throws Exception {
    // schema sanity check: assert that the nonexistent_field_i_dvo doesn't exist already
    FieldInfo fi;
    RefCounted<SolrIndexSearcher> holder = h.getCore().getSearcher();
    try {
        fi = holder.get().getSlowAtomicReader().getFieldInfos().fieldInfo("nonexistent_field_i_dvo");
    } finally {
        holder.decref();
    }
    assertNull(fi);
    // Partial update
    addAndGetVersion(sdoc("id", "0", "nonexistent_field_i_dvo", map("set", "42")), null);
    addAndGetVersion(sdoc("id", "1"), null);
    addAndGetVersion(sdoc("id", "1", "nonexistent_field_i_dvo", map("inc", "1")), null);
    addAndGetVersion(sdoc("id", "1", "nonexistent_field_i_dvo", map("inc", "1")), null);
    assertU(commit());
    assertQ(req("q", "*:*"), "//*[@numFound='2']");
    assertQ(req("q", "nonexistent_field_i_dvo:42"), "//*[@numFound='1']");
    assertQ(req("q", "nonexistent_field_i_dvo:2"), "//*[@numFound='1']");
}
Also used : SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) FieldInfo(org.apache.lucene.index.FieldInfo) Test(org.junit.Test)

Example 24 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class TestUninvertingReader method testFieldInfos.

public void testFieldInfos() throws IOException {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
    Document doc = new Document();
    BytesRef idBytes = new BytesRef("id");
    doc.add(new StringField("id", idBytes, Store.YES));
    doc.add(new LegacyIntField("int", 5, Store.YES));
    doc.add(new NumericDocValuesField("dv", 5));
    doc.add(new IntPoint("dint", 5));
    // not indexed
    doc.add(new StoredField("stored", 5));
    iw.addDocument(doc);
    iw.forceMerge(1);
    iw.close();
    Map<String, Type> uninvertingMap = new HashMap<>();
    uninvertingMap.put("int", Type.LEGACY_INTEGER);
    uninvertingMap.put("dv", Type.LEGACY_INTEGER);
    uninvertingMap.put("dint", Type.INTEGER_POINT);
    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), uninvertingMap);
    LeafReader leafReader = ir.leaves().get(0).reader();
    FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
    assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
    assertEquals(0, intFInfo.getPointDimensionCount());
    assertEquals(0, intFInfo.getPointNumBytes());
    FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
    assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
    assertEquals(1, dintFInfo.getPointDimensionCount());
    assertEquals(4, dintFInfo.getPointNumBytes());
    FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
    assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
    FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
    assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
    TestUtil.checkReader(ir);
    ir.close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) Type(org.apache.solr.uninverting.UninvertingReader.Type) LegacyFieldType(org.apache.solr.legacy.LegacyFieldType) DocValuesType(org.apache.lucene.index.DocValuesType) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) FieldInfo(org.apache.lucene.index.FieldInfo) Directory(org.apache.lucene.store.Directory) LegacyIntField(org.apache.solr.legacy.LegacyIntField)

Example 25 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class FSTTermsWriter method write.

@Override
public void write(Fields fields) throws IOException {
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            continue;
        }
        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
        boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
        TermsEnum termsEnum = terms.iterator();
        TermsWriter termsWriter = new TermsWriter(fieldInfo);
        long sumTotalTermFreq = 0;
        long sumDocFreq = 0;
        FixedBitSet docsSeen = new FixedBitSet(maxDoc);
        while (true) {
            BytesRef term = termsEnum.next();
            if (term == null) {
                break;
            }
            BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen);
            if (termState != null) {
                termsWriter.finishTerm(term, termState);
                sumTotalTermFreq += termState.totalTermFreq;
                sumDocFreq += termState.docFreq;
            }
        }
        termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
    }
}
Also used : BlockTermState(org.apache.lucene.codecs.BlockTermState) FixedBitSet(org.apache.lucene.util.FixedBitSet) Terms(org.apache.lucene.index.Terms) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput (org.apache.lucene.store.IndexOutput)6 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4