use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class ExportWriter method writeDoc.
protected void writeDoc(SortDoc sortDoc, List<LeafReaderContext> leaves, EntryWriter ew) throws IOException {
int ord = sortDoc.ord;
FixedBitSet set = sets[ord];
set.clear(sortDoc.docId);
LeafReaderContext context = leaves.get(ord);
int fieldIndex = 0;
for (FieldWriter fieldWriter : fieldWriters) {
if (fieldWriter.write(sortDoc.docId, context.reader(), ew, fieldIndex)) {
++fieldIndex;
}
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SolrIndexSearcher method getDocSet.
/** @lucene.internal */
public DocSet getDocSet(DocsEnumState deState) throws IOException {
int largestPossible = deState.termsEnum.docFreq();
boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached;
TermQuery key = null;
if (useCache) {
key = new TermQuery(new Term(deState.fieldName, deState.termsEnum.term()));
DocSet result = filterCache.get(key);
if (result != null)
return result;
}
int smallSetSize = DocSetUtil.smallSetSize(maxDoc());
int scratchSize = Math.min(smallSetSize, largestPossible);
if (deState.scratch == null || deState.scratch.length < scratchSize)
deState.scratch = new int[scratchSize];
final int[] docs = deState.scratch;
int upto = 0;
int bitsSet = 0;
FixedBitSet fbs = null;
PostingsEnum postingsEnum = deState.termsEnum.postings(deState.postingsEnum, PostingsEnum.NONE);
postingsEnum = BitsFilteredPostingsEnum.wrap(postingsEnum, deState.liveDocs);
if (deState.postingsEnum == null) {
deState.postingsEnum = postingsEnum;
}
if (postingsEnum instanceof MultiPostingsEnum) {
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
for (int subindex = 0; subindex < numSubs; subindex++) {
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
if (sub.postingsEnum == null)
continue;
int base = sub.slice.start;
int docid;
if (largestPossible > docs.length) {
if (fbs == null)
fbs = new FixedBitSet(maxDoc());
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
fbs.set(docid + base);
bitsSet++;
}
} else {
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid + base;
}
}
}
} else {
int docid;
if (largestPossible > docs.length) {
fbs = new FixedBitSet(maxDoc());
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
fbs.set(docid);
bitsSet++;
}
} else {
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid;
}
}
}
DocSet result;
if (fbs != null) {
for (int i = 0; i < upto; i++) {
fbs.set(docs[i]);
}
bitsSet += upto;
result = new BitDocSet(fbs, bitsSet);
} else {
result = upto == 0 ? DocSet.EMPTY : new SortedIntDocSet(Arrays.copyOf(docs, upto));
}
if (useCache) {
filterCache.put(key, result);
}
return result;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SolrIndexSearcher method makeBitDocSet.
private BitDocSet makeBitDocSet(DocSet answer) {
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
// or make DocSet instances remember maxDoc
FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator();
while (iter.hasNext()) {
bs.set(iter.nextDoc());
}
return new BitDocSet(bs, answer.size());
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestJoinUtil method executeRandomJoin.
private void executeRandomJoin(boolean multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) throws Exception {
for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) {
if (VERBOSE) {
System.out.println("indexIter=" + indexIter);
}
IndexIterationContext context = createContext(numberOfDocumentsToIndex, multipleValuesPerDocument, false);
IndexSearcher indexSearcher = context.searcher;
for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) {
if (VERBOSE) {
System.out.println("searchIter=" + searchIter);
}
int r = random().nextInt(context.randomUniqueValues.length);
boolean from = context.randomFrom[r];
String randomValue = context.randomUniqueValues[r];
BitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
final Query actualQuery = new TermQuery(new Term("value", randomValue));
if (VERBOSE) {
System.out.println("actualQuery=" + actualQuery);
}
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
if (VERBOSE) {
System.out.println("scoreMode=" + scoreMode);
}
final Query joinQuery;
{
// single val can be handled by multiple-vals
final boolean muliValsQuery = multipleValuesPerDocument || random().nextBoolean();
final String fromField = from ? "from" : "to";
final String toField = from ? "to" : "from";
int surpriseMe = random().nextInt(2);
switch(surpriseMe) {
case 0:
Class<? extends Number> numType;
String suffix;
if (random().nextBoolean()) {
numType = Integer.class;
suffix = "INT";
} else if (random().nextBoolean()) {
numType = Float.class;
suffix = "FLOAT";
} else if (random().nextBoolean()) {
numType = Long.class;
suffix = "LONG";
} else {
numType = Double.class;
suffix = "DOUBLE";
}
joinQuery = JoinUtil.createJoinQuery(fromField + suffix, muliValsQuery, toField + suffix, numType, actualQuery, indexSearcher, scoreMode);
break;
case 1:
joinQuery = JoinUtil.createJoinQuery(fromField, muliValsQuery, toField, actualQuery, indexSearcher, scoreMode);
break;
default:
throw new RuntimeException("unexpected value " + surpriseMe);
}
}
if (VERBOSE) {
System.out.println("joinQuery=" + joinQuery);
}
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
final BitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10);
indexSearcher.search(joinQuery, MultiCollector.wrap(new BitSetCollector(actualResult), topScoreDocCollector));
// Asserting bit set...
assertBitSet(expectedResult, actualResult, indexSearcher);
// Asserting TopDocs...
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
assertTopDocs(expectedTopDocs, actualTopDocs, scoreMode, indexSearcher, joinQuery);
}
context.close();
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class ExpandComponent method process.
@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doExpand) {
return;
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String field = params.get(ExpandParams.EXPAND_FIELD);
String hint = null;
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
field = cp.getField();
hint = cp.hint;
}
}
}
}
if (field == null) {
throw new IOException("Expand field is null.");
}
String sortParam = params.get(ExpandParams.EXPAND_SORT);
String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
String qs = params.get(ExpandParams.EXPAND_Q);
int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
Sort sort = null;
if (sortParam != null) {
sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
}
Query query;
if (qs == null) {
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
}
}
List<Query> newFilters = new ArrayList<>();
if (fqs == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
newFilters.add(q);
}
}
}
} else {
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
SolrIndexSearcher searcher = req.getSearcher();
LeafReader reader = searcher.getSlowAtomicReader();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
SortedDocValues values = null;
long nullValue = 0L;
if (fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
} else {
//Get the nullValue for the numeric collapse field
String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
final NumberType numType = fieldType.getNumberType();
// we don't need to handle invalid 64-bit field types here.
if (defaultValue != null) {
if (numType == NumberType.INTEGER) {
nullValue = Long.parseLong(defaultValue);
} else if (numType == NumberType.FLOAT) {
nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
}
} else if (NumberType.FLOAT.equals(numType)) {
// Integer case already handled by nullValue defaulting to 0
nullValue = Float.floatToIntBits(0.0f);
}
}
FixedBitSet groupBits = null;
LongHashSet groupSet = null;
DocList docList = rb.getResults().docList;
IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
//Gather the groups for the current page of documents
DocIterator idit = docList.iterator();
int[] globalDocs = new int[docList.size()];
int docsIndex = -1;
while (idit.hasNext()) {
globalDocs[++docsIndex] = idit.nextDoc();
}
Arrays.sort(globalDocs);
Query groupQuery = null;
/*
* This code gathers the group information for the current page.
*/
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
if (contexts.size() == 0) {
//When no context is available we can skip the expanding
return;
}
int currentContext = 0;
int currentDocBase = contexts.get(currentContext).docBase;
int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
IntObjectHashMap<BytesRef> ordBytes = null;
if (values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
if (values instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
int count = 0;
ordBytes = new IntObjectHashMap<>();
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
if (ordinalMap != null) {
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
}
int contextDoc = globalDoc - currentDocBase;
if (ordinalMap != null) {
if (contextDoc > currentValues.docID()) {
currentValues.advance(contextDoc);
}
if (contextDoc == currentValues.docID()) {
int ord = currentValues.ordValue();
++count;
BytesRef ref = currentValues.lookupOrd(ord);
ord = (int) segmentOrdinalMap.get(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
} else {
if (globalDoc > values.docID()) {
values.advance(globalDoc);
}
if (globalDoc == values.docID()) {
int ord = values.ordValue();
++count;
BytesRef ref = values.lookupOrd(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
}
}
if (count > 0 && count < 200) {
try {
groupQuery = getGroupQuery(field, count, ordBytes);
} catch (Exception e) {
throw new IOException(e);
}
}
} else {
groupSet = new LongHashSet(docList.size());
NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
int count = 0;
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
}
int contextDoc = globalDoc - currentDocBase;
int valueDocID = collapseValues.docID();
if (valueDocID < contextDoc) {
valueDocID = collapseValues.advance(contextDoc);
}
long value;
if (valueDocID == contextDoc) {
value = collapseValues.longValue();
} else {
value = 0;
}
if (value != nullValue) {
++count;
groupSet.add(value);
collapsedSet.add(globalDoc);
}
}
if (count > 0 && count < 200) {
if (fieldType.isPointField()) {
groupQuery = getPointGroupQuery(schemaField, count, groupSet);
} else {
groupQuery = getGroupQuery(field, fieldType, count, groupSet);
}
}
}
Collector collector;
if (sort != null)
sort = sort.rewrite(searcher);
Collector groupExpandCollector = null;
if (values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
} else {
groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
}
if (groupQuery != null) {
//Limits the results to documents that are in the same group as the documents in the page.
newFilters.add(groupQuery);
}
SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
if (pfilter.postFilter != null) {
pfilter.postFilter.setLastDelegate(groupExpandCollector);
collector = pfilter.postFilter;
} else {
collector = groupExpandCollector;
}
if (pfilter.filter == null) {
searcher.search(query, collector);
} else {
Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
searcher.search(q, collector);
}
LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
NamedList outMap = new SimpleOrderedMap();
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
if (fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int) groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
}
rb.rsp.add("expanded", outMap);
}
Aggregations