use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class TestMemoryIndex method testOmitNorms.
@Test
public void testOmitNorms() throws IOException {
MemoryIndex mi = new MemoryIndex();
FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(true);
mi.addField(new Field("f1", "some text in here", ft), analyzer);
mi.freeze();
LeafReader leader = (LeafReader) mi.createSearcher().getIndexReader();
NumericDocValues norms = leader.getNormValues("f1");
assertNull(norms);
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class TestMemoryIndex method testPointValuesDoNotAffectPositionsOrOffset.
public void testPointValuesDoNotAffectPositionsOrOffset() throws Exception {
MemoryIndex mi = new MemoryIndex(true, true);
mi.addField(new TextField("text", "quick brown fox", Field.Store.NO), analyzer);
mi.addField(new BinaryPoint("text", "quick".getBytes(StandardCharsets.UTF_8)), analyzer);
mi.addField(new BinaryPoint("text", "brown".getBytes(StandardCharsets.UTF_8)), analyzer);
LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
TermsEnum tenum = leafReader.terms("text").iterator();
assertEquals("brown", tenum.next().utf8ToString());
PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(1, penum.nextPosition());
assertEquals(6, penum.startOffset());
assertEquals(11, penum.endOffset());
assertEquals("fox", tenum.next().utf8ToString());
penum = tenum.postings(penum, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(2, penum.nextPosition());
assertEquals(12, penum.startOffset());
assertEquals(15, penum.endOffset());
assertEquals("quick", tenum.next().utf8ToString());
penum = tenum.postings(penum, PostingsEnum.OFFSETS);
assertEquals(0, penum.nextDoc());
assertEquals(1, penum.freq());
assertEquals(0, penum.nextPosition());
assertEquals(0, penum.startOffset());
assertEquals(5, penum.endOffset());
IndexSearcher indexSearcher = mi.createSearcher();
assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "quick".getBytes(StandardCharsets.UTF_8))));
assertEquals(1, indexSearcher.count(BinaryPoint.newExactQuery("text", "brown".getBytes(StandardCharsets.UTF_8))));
assertEquals(0, indexSearcher.count(BinaryPoint.newExactQuery("text", "jumps".getBytes(StandardCharsets.UTF_8))));
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class CompletionQuery method rewrite.
@Override
public Query rewrite(IndexReader reader) throws IOException {
byte type = 0;
boolean first = true;
Terms terms;
for (LeafReaderContext context : reader.leaves()) {
LeafReader leafReader = context.reader();
try {
if ((terms = leafReader.terms(getField())) == null) {
continue;
}
} catch (IOException e) {
continue;
}
if (terms instanceof CompletionTerms) {
CompletionTerms completionTerms = (CompletionTerms) terms;
byte t = completionTerms.getType();
if (first) {
type = t;
first = false;
} else if (type != t) {
throw new IllegalStateException(getField() + " has values of multiple types");
}
}
}
if (first == false) {
if (this instanceof ContextQuery) {
if (type == SuggestField.TYPE) {
throw new IllegalStateException(this.getClass().getSimpleName() + " can not be executed against a non context-enabled SuggestField: " + getField());
}
} else {
if (type == ContextSuggestField.TYPE) {
return new ContextQuery(this);
}
}
}
return super.rewrite(reader);
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class ExpandComponent method process.
@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doExpand) {
return;
}
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String field = params.get(ExpandParams.EXPAND_FIELD);
String hint = null;
if (field == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
field = cp.getField();
hint = cp.hint;
}
}
}
}
if (field == null) {
throw new IOException("Expand field is null.");
}
String sortParam = params.get(ExpandParams.EXPAND_SORT);
String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
String qs = params.get(ExpandParams.EXPAND_Q);
int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
Sort sort = null;
if (sortParam != null) {
sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
}
Query query;
if (qs == null) {
query = rb.getQuery();
} else {
try {
QParser parser = QParser.getParser(qs, req);
query = parser.getQuery();
} catch (Exception e) {
throw new IOException(e);
}
}
List<Query> newFilters = new ArrayList<>();
if (fqs == null) {
List<Query> filters = rb.getFilters();
if (filters != null) {
for (Query q : filters) {
if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
newFilters.add(q);
}
}
}
} else {
try {
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
QParser fqp = QParser.getParser(fq, req);
newFilters.add(fqp.getQuery());
}
}
} catch (Exception e) {
throw new IOException(e);
}
}
SolrIndexSearcher searcher = req.getSearcher();
LeafReader reader = searcher.getSlowAtomicReader();
SchemaField schemaField = searcher.getSchema().getField(field);
FieldType fieldType = schemaField.getType();
SortedDocValues values = null;
long nullValue = 0L;
if (fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
} else {
//Get the nullValue for the numeric collapse field
String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
final NumberType numType = fieldType.getNumberType();
// we don't need to handle invalid 64-bit field types here.
if (defaultValue != null) {
if (numType == NumberType.INTEGER) {
nullValue = Long.parseLong(defaultValue);
} else if (numType == NumberType.FLOAT) {
nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
}
} else if (NumberType.FLOAT.equals(numType)) {
// Integer case already handled by nullValue defaulting to 0
nullValue = Float.floatToIntBits(0.0f);
}
}
FixedBitSet groupBits = null;
LongHashSet groupSet = null;
DocList docList = rb.getResults().docList;
IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
//Gather the groups for the current page of documents
DocIterator idit = docList.iterator();
int[] globalDocs = new int[docList.size()];
int docsIndex = -1;
while (idit.hasNext()) {
globalDocs[++docsIndex] = idit.nextDoc();
}
Arrays.sort(globalDocs);
Query groupQuery = null;
/*
* This code gathers the group information for the current page.
*/
List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
if (contexts.size() == 0) {
//When no context is available we can skip the expanding
return;
}
int currentContext = 0;
int currentDocBase = contexts.get(currentContext).docBase;
int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
IntObjectHashMap<BytesRef> ordBytes = null;
if (values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
if (values instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
int count = 0;
ordBytes = new IntObjectHashMap<>();
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
if (ordinalMap != null) {
currentValues = sortedDocValues[currentContext];
segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
}
}
int contextDoc = globalDoc - currentDocBase;
if (ordinalMap != null) {
if (contextDoc > currentValues.docID()) {
currentValues.advance(contextDoc);
}
if (contextDoc == currentValues.docID()) {
int ord = currentValues.ordValue();
++count;
BytesRef ref = currentValues.lookupOrd(ord);
ord = (int) segmentOrdinalMap.get(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
} else {
if (globalDoc > values.docID()) {
values.advance(globalDoc);
}
if (globalDoc == values.docID()) {
int ord = values.ordValue();
++count;
BytesRef ref = values.lookupOrd(ord);
ordBytes.put(ord, BytesRef.deepCopyOf(ref));
groupBits.set(ord);
collapsedSet.add(globalDoc);
}
}
}
if (count > 0 && count < 200) {
try {
groupQuery = getGroupQuery(field, count, ordBytes);
} catch (Exception e) {
throw new IOException(e);
}
}
} else {
groupSet = new LongHashSet(docList.size());
NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
int count = 0;
for (int i = 0; i < globalDocs.length; i++) {
int globalDoc = globalDocs[i];
while (globalDoc >= nextDocBase) {
currentContext++;
currentDocBase = contexts.get(currentContext).docBase;
nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
}
int contextDoc = globalDoc - currentDocBase;
int valueDocID = collapseValues.docID();
if (valueDocID < contextDoc) {
valueDocID = collapseValues.advance(contextDoc);
}
long value;
if (valueDocID == contextDoc) {
value = collapseValues.longValue();
} else {
value = 0;
}
if (value != nullValue) {
++count;
groupSet.add(value);
collapsedSet.add(globalDoc);
}
}
if (count > 0 && count < 200) {
if (fieldType.isPointField()) {
groupQuery = getPointGroupQuery(schemaField, count, groupSet);
} else {
groupQuery = getGroupQuery(field, fieldType, count, groupSet);
}
}
}
Collector collector;
if (sort != null)
sort = sort.rewrite(searcher);
Collector groupExpandCollector = null;
if (values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap();
mapping.put(field, UninvertingReader.Type.SORTED);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
}
groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
} else {
groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
}
if (groupQuery != null) {
//Limits the results to documents that are in the same group as the documents in the page.
newFilters.add(groupQuery);
}
SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
if (pfilter.postFilter != null) {
pfilter.postFilter.setLastDelegate(groupExpandCollector);
collector = pfilter.postFilter;
} else {
collector = groupExpandCollector;
}
if (pfilter.filter == null) {
searcher.search(query, collector);
} else {
Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
searcher.search(q, collector);
}
LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
NamedList outMap = new SimpleOrderedMap();
CharsRefBuilder charsRef = new CharsRefBuilder();
for (LongObjectCursor<Collector> cursor : groups) {
long groupValue = cursor.key;
TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
TopDocs topDocs = topDocsCollector.topDocs();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
int[] docs = new int[scoreDocs.length];
float[] scores = new float[scoreDocs.length];
for (int i = 0; i < docs.length; i++) {
ScoreDoc scoreDoc = scoreDocs[i];
docs[i] = scoreDoc.doc;
scores[i] = scoreDoc.score;
}
DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
if (fieldType instanceof StrField) {
final BytesRef bytesRef = ordBytes.get((int) groupValue);
fieldType.indexedToReadable(bytesRef, charsRef);
String group = charsRef.toString();
outMap.add(group, slice);
} else {
outMap.add(numericToString(fieldType, groupValue), slice);
}
}
}
rb.rsp.add("expanded", outMap);
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class JoinUtil method createJoinQuery.
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
* a document matches per join value. This will increase the per join cost in terms of execution time and memory.
*
* @param joinField The {@link SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @param min Optionally the minimum number of "from" documents that are required to match for a "to" document
* to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @param max Optionally the maximum number of "from" documents that are allowed to match for a "to" document
* to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField, Query fromQuery, Query toQuery, IndexSearcher searcher, ScoreMode scoreMode, MultiDocValues.OrdinalMap ordinalMap, int min, int max) throws IOException {
int numSegments = searcher.getIndexReader().leaves().size();
final long valueCount;
if (numSegments == 0) {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
} else if (numSegments == 1) {
// No need to use the ordinal map, because there is just one segment.
ordinalMap = null;
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
if (joinSortedDocValues != null) {
valueCount = joinSortedDocValues.getValueCount();
} else {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
}
} else {
if (ordinalMap == null) {
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
}
valueCount = ordinalMap.getValueCount();
}
final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
final Query rewrittenToQuery = searcher.rewrite(toQuery);
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch(scoreMode) {
case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
break;
case Min:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
break;
case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
break;
case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
break;
case None:
if (min <= 0 && max == Integer.MAX_VALUE) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, searcher.getTopReaderContext().id());
} else {
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
break;
}
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}
Aggregations