use of com.carrotsearch.hppc.LongHashSet in project elasticsearch by elastic.
the class HistogramIT method testSingleValuedFieldOrderedByCountDesc.
public void testSingleValuedFieldOrderedByCountDesc() throws Exception {
SearchResponse response = client().prepareSearch("idx").addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).order(Histogram.Order.COUNT_DESC)).execute().actionGet();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getName(), equalTo("histo"));
assertThat(histo.getBuckets().size(), equalTo(numValueBuckets));
LongHashSet buckets = new LongHashSet();
// TODO: use diamond once JI-9019884 is fixed
List<Histogram.Bucket> histoBuckets = new ArrayList<>(histo.getBuckets());
long previousCount = Long.MAX_VALUE;
for (int i = 0; i < numValueBuckets; ++i) {
Histogram.Bucket bucket = histoBuckets.get(i);
assertThat(bucket, notNullValue());
long key = ((Number) bucket.getKey()).longValue();
assertEquals(0, key % interval);
assertTrue(buckets.add(key));
assertThat(bucket.getDocCount(), equalTo(valueCounts[(int) (key / interval)]));
assertThat(bucket.getDocCount(), lessThanOrEqualTo(previousCount));
previousCount = bucket.getDocCount();
}
}
use of com.carrotsearch.hppc.LongHashSet in project elasticsearch by elastic.
the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.
private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
final int[] counts = new int[ranges.length];
SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
use of com.carrotsearch.hppc.LongHashSet in project lucene-solr by apache.
the class ExpandComponent method process.
@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doExpand) {
return;
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String field = params.get(ExpandParams.EXPAND_FIELD);
String hint = null;
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
field = cp.getField();
hint = cp.hint;
}
}
}
}
if (field == null) {
throw new IOException("Expand field is null.");
}
String sortParam = params.get(ExpandParams.EXPAND_SORT);
String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
String qs = params.get(ExpandParams.EXPAND_Q);
int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
Sort sort = null;
if (sortParam != null) {
sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
}
Query query;
if (qs == null) {
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
}
}
List<Query> newFilters = new ArrayList<>();
if (fqs == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
newFilters.add(q);
}
}
}
} else {
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
SolrIndexSearcher searcher = req.getSearcher();
LeafReader reader = searcher.getSlowAtomicReader();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
SortedDocValues values = null;
long nullValue = 0L;
if (fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
} else {
//Get the nullValue for the numeric collapse field
String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
final NumberType numType = fieldType.getNumberType();
// we don't need to handle invalid 64-bit field types here.
if (defaultValue != null) {
if (numType == NumberType.INTEGER) {
nullValue = Long.parseLong(defaultValue);
} else if (numType == NumberType.FLOAT) {
nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
}
} else if (NumberType.FLOAT.equals(numType)) {
// Integer case already handled by nullValue defaulting to 0
nullValue = Float.floatToIntBits(0.0f);
}
}
FixedBitSet groupBits = null;
LongHashSet groupSet = null;
DocList docList = rb.getResults().docList;
IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
//Gather the groups for the current page of documents
DocIterator idit = docList.iterator();
int[] globalDocs = new int[docList.size()];
int docsIndex = -1;
while (idit.hasNext()) {
globalDocs[++docsIndex] = idit.nextDoc();
}
Arrays.sort(globalDocs);
Query groupQuery = null;
/*
* This code gathers the group information for the current page.
*/
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
if (contexts.size() == 0) {
//When no context is available we can skip the expanding
return;
}
int currentContext = 0;
int currentDocBase = contexts.get(currentContext).docBase;
int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
IntObjectHashMap<BytesRef> ordBytes = null;
if (values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
if (values instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
int count = 0;
ordBytes = new IntObjectHashMap<>();
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
if (ordinalMap != null) {
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
}
int contextDoc = globalDoc - currentDocBase;
if (ordinalMap != null) {
if (contextDoc > currentValues.docID()) {
currentValues.advance(contextDoc);
}
if (contextDoc == currentValues.docID()) {
int ord = currentValues.ordValue();
++count;
BytesRef ref = currentValues.lookupOrd(ord);
ord = (int) segmentOrdinalMap.get(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
} else {
if (globalDoc > values.docID()) {
values.advance(globalDoc);
}
if (globalDoc == values.docID()) {
int ord = values.ordValue();
++count;
BytesRef ref = values.lookupOrd(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
}
}
if (count > 0 && count < 200) {
try {
groupQuery = getGroupQuery(field, count, ordBytes);
} catch (Exception e) {
throw new IOException(e);
}
}
} else {
groupSet = new LongHashSet(docList.size());
NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
int count = 0;
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
}
int contextDoc = globalDoc - currentDocBase;
int valueDocID = collapseValues.docID();
if (valueDocID < contextDoc) {
valueDocID = collapseValues.advance(contextDoc);
}
long value;
if (valueDocID == contextDoc) {
value = collapseValues.longValue();
} else {
value = 0;
}
if (value != nullValue) {
++count;
groupSet.add(value);
collapsedSet.add(globalDoc);
}
}
if (count > 0 && count < 200) {
if (fieldType.isPointField()) {
groupQuery = getPointGroupQuery(schemaField, count, groupSet);
} else {
groupQuery = getGroupQuery(field, fieldType, count, groupSet);
}
}
}
Collector collector;
if (sort != null)
sort = sort.rewrite(searcher);
Collector groupExpandCollector = null;
if (values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
} else {
groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
}
if (groupQuery != null) {
//Limits the results to documents that are in the same group as the documents in the page.
newFilters.add(groupQuery);
}
SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
if (pfilter.postFilter != null) {
pfilter.postFilter.setLastDelegate(groupExpandCollector);
collector = pfilter.postFilter;
} else {
collector = groupExpandCollector;
}
if (pfilter.filter == null) {
searcher.search(query, collector);
} else {
Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
searcher.search(q, collector);
}
LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
NamedList outMap = new SimpleOrderedMap();
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
if (fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int) groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
}
rb.rsp.add("expanded", outMap);
}
use of com.carrotsearch.hppc.LongHashSet in project lucene-solr by apache.
the class ExplicitHLLTest method assertElementsEqual.
// ************************************************************************
// assertion helpers
/**
* Asserts that values in both sets are exactly equal.
*/
private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
final LongHashSet internalSetA = hllA.explicitStorage;
final LongHashSet internalSetB = hllB.explicitStorage;
assertTrue(internalSetA.equals(internalSetB));
}
use of com.carrotsearch.hppc.LongHashSet in project titan by thinkaurelius.
the class IDAuthorityTest method testSimpleIDAcquisition.
@Test
public void testSimpleIDAcquisition() throws BackendException {
final IDBlockSizer blockSizer = new InnerIDBlockSizer();
idAuthorities[0].setIDBlockSizer(blockSizer);
int numTrials = 100;
LongSet ids = new LongHashSet((int) blockSize * numTrials);
long previous = 0;
for (int i = 0; i < numTrials; i++) {
IDBlock block = idAuthorities[0].getIDBlock(0, 0, GET_ID_BLOCK_TIMEOUT);
checkBlock(block, ids);
if (hasEmptyUid) {
if (previous != 0)
assertEquals(previous + 1, block.getId(0));
previous = block.getId(block.numIds() - 1);
}
}
}
Aggregations