Search in sources :

Example 1 with TermStats

use of org.xbib.elasticsearch.skywalker.stats.TermStats in project elasticsearch-skywalker by jprante.

the class Skywalker method getHighFreqTerms.

public TermStats[] getHighFreqTerms(int numTerms, String[] fieldNames) {
    TermStatsQueue tiq = new TermStatsQueue(numTerms);
    TermsEnum te = null;
    try {
        if (fieldNames != null) {
            Fields fields = MultiFields.getFields(reader);
            if (fields == null) {
                return EMPTY_STATS;
            }
            for (String field : fieldNames) {
                Terms terms = fields.terms(field);
                if (terms != null) {
                    te = terms.iterator(te);
                    fillQueue(te, tiq, field);
                }
            }
        } else {
            Fields fields = MultiFields.getFields(reader);
            if (fields == null) {
                return EMPTY_STATS;
            }
            for (String field : fields) {
                Terms terms = fields.terms(field);
                te = terms.iterator(te);
                fillQueue(te, tiq, field);
            }
        }
    } catch (IOException e) {
    // ignore
    }
    TermStats[] result = new TermStats[tiq.size()];
    // we want highest first so we read the queue and populate the array
    // starting at the end and work backwards
    int count = tiq.size() - 1;
    while (tiq.size() != 0) {
        result[count] = tiq.pop();
        count--;
    }
    return result;
}
Also used : TermStats(org.xbib.elasticsearch.skywalker.stats.TermStats) IOException(java.io.IOException) TermStatsQueue(org.xbib.elasticsearch.skywalker.stats.TermStatsQueue)

Example 2 with TermStats

use of org.xbib.elasticsearch.skywalker.stats.TermStats in project elasticsearch-skywalker by jprante.

the class Skywalker method fillQueue.

private void fillQueue(TermsEnum termsEnum, TermStatsQueue tiq, String field) {
    while (true) {
        try {
            BytesRef term = termsEnum.next();
            if (term != null) {
                BytesRef text = new BytesRef();
                text.copyBytes(term);
                TermStats ts = new TermStats();
                ts.field(field).text(text).docFreq(termsEnum.docFreq());
                tiq.insertWithOverflow(ts);
            } else {
                break;
            }
        } catch (IOException e) {
            break;
        }
    }
}
Also used : TermStats(org.xbib.elasticsearch.skywalker.stats.TermStats) IOException(java.io.IOException) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with TermStats

use of org.xbib.elasticsearch.skywalker.stats.TermStats in project elasticsearch-skywalker by jprante.

the class TransportSkywalkerAction method shardOperation.

@Override
protected ShardSkywalkerResponse shardOperation(ShardSkywalkerRequest request) throws ElasticsearchException {
    synchronized (mutex) {
        IndexService indexService = indicesService.indexServiceSafe(request.index());
        InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId());
        MapperService mapperService = indexService.mapperService();
        Engine.Searcher searcher = indexShard.acquireSearcher("skywalker_action");
        try {
            IndexReader reader = searcher.reader();
            Skywalker skywalker = new Skywalker(reader);
            Map<String, Object> response = new HashMap();
            Directory directory = indexShard.store().directory();
            List indexFiles = new ArrayList();
            for (String f : skywalker.getIndexFiles(directory)) {
                Map indexFile = new HashMap();
                indexFile.put("name", f);
                indexFile.put("function", skywalker.getFileFunction(f));
                indexFiles.add(indexFile);
            }
            response.put("indexFiles", indexFiles);
            skywalker.getStoreMetadata(response, indexShard.store().getMetadata());
            response.put("indexVersion", skywalker.getVersion());
            response.put("directoryImpl", skywalker.getDirImpl());
            response.put("numDocs", reader.numDocs());
            response.put("maxDoc", reader.maxDoc());
            response.put("hasDeletions", reader.hasDeletions());
            response.put("numDeletedDocs", reader.numDeletedDocs());
            Set<FieldTermCount> ftc = skywalker.getFieldTermCounts();
            response.put("numTerms", skywalker.getNumTerms());
            Map indexFormatInfo = new HashMap();
            FormatDetails details = skywalker.getFormatDetails();
            indexFormatInfo.put("version", details.getVersion());
            indexFormatInfo.put("genericName", details.getGenericName());
            indexFormatInfo.put("capabilities", details.getCapabilities());
            response.put("indexFormat", indexFormatInfo);
            List commits = new ArrayList();
            Iterator<Segment> it = indexShard.engine().segments().iterator();
            while (it.hasNext()) {
                Segment segment = it.next();
                Map m = new HashMap();
                m.put("segment", segment.getName());
                m.put("count", segment.getNumDocs());
                m.put("deleted", segment.getDeletedDocs());
                m.put("generation", segment.getGeneration());
                m.put("sizeInBytes", segment.getSizeInBytes());
                m.put("version", segment.getVersion());
                m.put("committed", segment.committed);
                m.put("compound", segment.compound);
                m.put("size", segment.getSize().toString());
                commits.add(m);
            }
            response.put("commits", commits);
            List fieldInfos = new ArrayList();
            for (FieldInfo fi : MultiFields.getMergedFieldInfos(reader)) {
                fieldInfos.add(skywalker.getFieldInfo(mapperService, fi));
            }
            response.put("fieldInfos", fieldInfos);
            List termList = new ArrayList();
            for (TermStats ts : skywalker.getTopTerms(50)) {
                Map m = new HashMap();
                m.put("field", ts.field());
                m.put("text", ts.text());
                m.put("docFreq", ts.docFreq());
                termList.add(m);
            }
            response.put("topterms", termList);
            return new ShardSkywalkerResponse(request.index(), request.shardId()).setResponse(response);
        } catch (Exception ex) {
            throw new ElasticsearchException(ex.getMessage(), ex);
        }
    }
}
Also used : IndexService(org.elasticsearch.index.service.IndexService) Lists.newArrayList(org.elasticsearch.common.collect.Lists.newArrayList) ElasticsearchException(org.elasticsearch.ElasticsearchException) Skywalker(org.xbib.elasticsearch.skywalker.Skywalker) Segment(org.elasticsearch.index.engine.Segment) FieldTermCount(org.xbib.elasticsearch.skywalker.stats.FieldTermCount) FormatDetails(org.xbib.elasticsearch.skywalker.FormatDetails) Lists.newArrayList(org.elasticsearch.common.collect.Lists.newArrayList) Engine(org.elasticsearch.index.engine.Engine) Directory(org.apache.lucene.store.Directory) TermStats(org.xbib.elasticsearch.skywalker.stats.TermStats) InternalIndexShard(org.elasticsearch.index.shard.service.InternalIndexShard) ElasticsearchException(org.elasticsearch.ElasticsearchException) ClusterBlockException(org.elasticsearch.cluster.block.ClusterBlockException) BroadcastShardOperationFailedException(org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException) ShardOperationFailedException(org.elasticsearch.action.ShardOperationFailedException) DefaultShardOperationFailedException(org.elasticsearch.action.support.DefaultShardOperationFailedException) IndexReader(org.apache.lucene.index.IndexReader) MapperService(org.elasticsearch.index.mapper.MapperService) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

TermStats (org.xbib.elasticsearch.skywalker.stats.TermStats)3 IOException (java.io.IOException)2 FieldInfo (org.apache.lucene.index.FieldInfo)1 IndexReader (org.apache.lucene.index.IndexReader)1 Directory (org.apache.lucene.store.Directory)1 BytesRef (org.apache.lucene.util.BytesRef)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 ShardOperationFailedException (org.elasticsearch.action.ShardOperationFailedException)1 DefaultShardOperationFailedException (org.elasticsearch.action.support.DefaultShardOperationFailedException)1 BroadcastShardOperationFailedException (org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException)1 ClusterBlockException (org.elasticsearch.cluster.block.ClusterBlockException)1 Lists.newArrayList (org.elasticsearch.common.collect.Lists.newArrayList)1 Engine (org.elasticsearch.index.engine.Engine)1 Segment (org.elasticsearch.index.engine.Segment)1 MapperService (org.elasticsearch.index.mapper.MapperService)1 IndexService (org.elasticsearch.index.service.IndexService)1 InternalIndexShard (org.elasticsearch.index.shard.service.InternalIndexShard)1 FormatDetails (org.xbib.elasticsearch.skywalker.FormatDetails)1 Skywalker (org.xbib.elasticsearch.skywalker.Skywalker)1 FieldTermCount (org.xbib.elasticsearch.skywalker.stats.FieldTermCount)1