Search in sources :

Example 1 with LongHashSet

use of com.carrotsearch.hppc.LongHashSet in project elasticsearch by elastic.

the class HistogramIT method testSingleValuedFieldOrderedByCountDesc.

public void testSingleValuedFieldOrderedByCountDesc() throws Exception {
    SearchResponse response = client().prepareSearch("idx").addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).order(Histogram.Order.COUNT_DESC)).execute().actionGet();
    assertSearchResponse(response);
    Histogram histo = response.getAggregations().get("histo");
    assertThat(histo, notNullValue());
    assertThat(histo.getName(), equalTo("histo"));
    assertThat(histo.getBuckets().size(), equalTo(numValueBuckets));
    LongHashSet buckets = new LongHashSet();
    // TODO: use diamond once JI-9019884 is fixed
    List<Histogram.Bucket> histoBuckets = new ArrayList<>(histo.getBuckets());
    long previousCount = Long.MAX_VALUE;
    for (int i = 0; i < numValueBuckets; ++i) {
        Histogram.Bucket bucket = histoBuckets.get(i);
        assertThat(bucket, notNullValue());
        long key = ((Number) bucket.getKey()).longValue();
        assertEquals(0, key % interval);
        assertTrue(buckets.add(key));
        assertThat(bucket.getDocCount(), equalTo(valueCounts[(int) (key / interval)]));
        assertThat(bucket.getDocCount(), lessThanOrEqualTo(previousCount));
        previousCount = bucket.getDocCount();
    }
}
Also used : LongHashSet(com.carrotsearch.hppc.LongHashSet) Histogram(org.elasticsearch.search.aggregations.bucket.histogram.Histogram) Bucket(org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket) Bucket(org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket) ArrayList(java.util.ArrayList) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 2 with LongHashSet

use of com.carrotsearch.hppc.LongHashSet in project elasticsearch by elastic.

the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.

private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
    final Set<BytesRef> termSet = new HashSet<>();
    final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
    while (termSet.size() < numTerms) {
        termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
    Arrays.sort(terms);
    final int numRanges = randomIntBetween(1, 10);
    BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
    for (int i = 0; i < numRanges; ++i) {
        ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
    }
    Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
    FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
    final int[] counts = new int[ranges.length];
    SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {

        @Override
        protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
            counts[(int) bucket]++;
        }
    };
    final int[] expectedCounts = new int[ranges.length];
    final int maxDoc = randomIntBetween(5, 10);
    for (int doc = 0; doc < maxDoc; ++doc) {
        LongHashSet ordinalSet = new LongHashSet();
        final int numValues = randomInt(maxNumValuesPerDoc);
        while (ordinalSet.size() < numValues) {
            ordinalSet.add(random().nextInt(terms.length));
        }
        final long[] ords = ordinalSet.toArray();
        Arrays.sort(ords);
        values.ords = ords;
        // simulate aggregation
        collector.collect(doc);
        // now do it the naive way
        for (int i = 0; i < ranges.length; ++i) {
            for (long ord : ords) {
                BytesRef term = terms[(int) ord];
                if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
                    expectedCounts[i]++;
                    break;
                }
            }
        }
    }
    assertArrayEquals(expectedCounts, counts);
}
Also used : SortedBinaryRangeLeafCollector(org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedBinaryRangeLeafCollector) LongHashSet(com.carrotsearch.hppc.LongHashSet) LeafBucketCollector(org.elasticsearch.search.aggregations.LeafBucketCollector) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) LongHashSet(com.carrotsearch.hppc.LongHashSet)

Example 3 with LongHashSet

use of com.carrotsearch.hppc.LongHashSet in project lucene-solr by apache.

the class ExpandComponent method process.

@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
        return;
    }
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
                }
            }
        }
    }
    if (field == null) {
        throw new IOException("Expand field is null.");
    }
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
            }
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
        }
    }
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    }
    Arrays.sort(globalDocs);
    Query groupQuery = null;
    /*
    * This code gathers the group information for the current page.
    */
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
        return;
    }
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        }
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
                }
            }
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                    currentValues.advance(contextDoc);
                }
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    ++count;
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            } else {
                if (globalDoc > values.docID()) {
                    values.advance(globalDoc);
                }
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    ++count;
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            }
        }
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            }
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            }
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            }
            if (value != nullValue) {
                ++count;
                groupSet.add(value);
                collapsedSet.add(globalDoc);
            }
        }
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
            }
        }
    }
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    }
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
        newFilters.add(groupQuery);
    }
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }
    if (pfilter.filter == null) {
        searcher.search(query, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
        searcher.search(q, collector);
    }
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
            }
        }
    }
    rb.rsp.add("expanded", outMap);
}
Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice(org.apache.solr.search.DocSlice) ScoreDoc(org.apache.lucene.search.ScoreDoc) FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser(org.apache.solr.search.QParser) SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator(org.apache.solr.search.DocIterator) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin(org.apache.solr.search.CollapsingQParserPlugin) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(org.apache.solr.search.DocList)

Example 4 with LongHashSet

use of com.carrotsearch.hppc.LongHashSet in project lucene-solr by apache.

the class ExplicitHLLTest method assertElementsEqual.

// ************************************************************************
// assertion helpers
/**
     * Asserts that values in both sets are exactly equal.
     */
private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
    final LongHashSet internalSetA = hllA.explicitStorage;
    final LongHashSet internalSetB = hllB.explicitStorage;
    assertTrue(internalSetA.equals(internalSetB));
}
Also used : LongHashSet(com.carrotsearch.hppc.LongHashSet)

Example 5 with LongHashSet

use of com.carrotsearch.hppc.LongHashSet in project titan by thinkaurelius.

the class IDAuthorityTest method testSimpleIDAcquisition.

@Test
public void testSimpleIDAcquisition() throws BackendException {
    final IDBlockSizer blockSizer = new InnerIDBlockSizer();
    idAuthorities[0].setIDBlockSizer(blockSizer);
    int numTrials = 100;
    LongSet ids = new LongHashSet((int) blockSize * numTrials);
    long previous = 0;
    for (int i = 0; i < numTrials; i++) {
        IDBlock block = idAuthorities[0].getIDBlock(0, 0, GET_ID_BLOCK_TIMEOUT);
        checkBlock(block, ids);
        if (hasEmptyUid) {
            if (previous != 0)
                assertEquals(previous + 1, block.getId(0));
            previous = block.getId(block.numIds() - 1);
        }
    }
}
Also used : LongHashSet(com.carrotsearch.hppc.LongHashSet) IDBlockSizer(com.thinkaurelius.titan.graphdb.database.idassigner.IDBlockSizer) LongSet(com.carrotsearch.hppc.LongSet) Test(org.junit.Test)

Aggregations

LongHashSet (com.carrotsearch.hppc.LongHashSet)30 LongSet (com.carrotsearch.hppc.LongSet)20 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)7 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 SearchResponse (org.elasticsearch.action.search.SearchResponse)5 Histogram (org.elasticsearch.search.aggregations.bucket.histogram.Histogram)5 Bucket (org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket)5 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)5 HashSet (java.util.HashSet)4 ExecutionException (java.util.concurrent.ExecutionException)4 ConsistentKeyIDAuthority (org.janusgraph.diskstorage.idmanagement.ConsistentKeyIDAuthority)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 MethodSource (org.junit.jupiter.params.provider.MethodSource)4 Callable (java.util.concurrent.Callable)3 BytesRef (org.apache.lucene.util.BytesRef)3 JanusGraph (org.janusgraph.core.JanusGraph)3 JanusGraphVertex (org.janusgraph.core.JanusGraphVertex)3