use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class JoinUtil method createJoinQuery.
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
* a document matches per join value. This will increase the per join cost in terms of execution time and memory.
*
* @param joinField The {@link SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @param min Optionally the minimum number of "from" documents that are required to match for a "to" document
* to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @param max Optionally the maximum number of "from" documents that are allowed to match for a "to" document
* to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField, Query fromQuery, Query toQuery, IndexSearcher searcher, ScoreMode scoreMode, MultiDocValues.OrdinalMap ordinalMap, int min, int max) throws IOException {
int numSegments = searcher.getIndexReader().leaves().size();
final long valueCount;
if (numSegments == 0) {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
} else if (numSegments == 1) {
// No need to use the ordinal map, because there is just one segment.
ordinalMap = null;
LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
if (joinSortedDocValues != null) {
valueCount = joinSortedDocValues.getValueCount();
} else {
return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
}
} else {
if (ordinalMap == null) {
throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
}
valueCount = ordinalMap.getValueCount();
}
final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
final Query rewrittenToQuery = searcher.rewrite(toQuery);
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch(scoreMode) {
case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
break;
case Min:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
break;
case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
break;
case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
break;
case None:
if (min <= 0 && max == Integer.MAX_VALUE) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, searcher.getTopReaderContext().id());
} else {
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
break;
}
default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
}
searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestBlockJoinSelector method testSortedSelector.
public void testSortedSelector() throws IOException {
final BitSet parents = new FixedBitSet(20);
parents.set(0);
parents.set(5);
parents.set(6);
parents.set(10);
parents.set(15);
parents.set(19);
final BitSet children = new FixedBitSet(20);
children.set(2);
children.set(3);
children.set(4);
children.set(12);
children.set(17);
final int[] ords = new int[20];
Arrays.fill(ords, -1);
ords[2] = 5;
ords[3] = 7;
ords[4] = 3;
ords[12] = 10;
ords[18] = 10;
final SortedDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MIN, parents, children);
assertEquals(5, mins.nextDoc());
assertEquals(3, mins.ordValue());
assertEquals(15, mins.nextDoc());
assertEquals(10, mins.ordValue());
assertEquals(19, mins.nextDoc());
assertEquals(10, mins.ordValue());
assertEquals(NO_MORE_DOCS, mins.nextDoc());
final SortedDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MAX, parents, children);
assertEquals(5, maxs.nextDoc());
assertEquals(7, maxs.ordValue());
assertEquals(15, maxs.nextDoc());
assertEquals(10, maxs.ordValue());
assertEquals(19, maxs.nextDoc());
assertEquals(10, maxs.ordValue());
assertEquals(NO_MORE_DOCS, maxs.nextDoc());
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class DocValuesTest method testDocValues.
@Test
public void testDocValues() throws IOException {
assertU(adoc("id", "1"));
assertU(commit());
try (SolrCore core = h.getCoreInc()) {
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
final SolrIndexSearcher searcher = searcherRef.get();
try {
final LeafReader reader = searcher.getSlowAtomicReader();
assertEquals(1, reader.numDocs());
final FieldInfos infos = reader.getFieldInfos();
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("floatdv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("intdv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("doubledv").getDocValuesType());
assertEquals(DocValuesType.NUMERIC, infos.fieldInfo("longdv").getDocValuesType());
assertEquals(DocValuesType.SORTED, infos.fieldInfo("stringdv").getDocValuesType());
assertEquals(DocValuesType.SORTED, infos.fieldInfo("booldv").getDocValuesType());
NumericDocValues dvs = reader.getNumericDocValues("floatdv");
assertEquals(0, dvs.nextDoc());
assertEquals((long) Float.floatToIntBits(1), dvs.longValue());
dvs = reader.getNumericDocValues("intdv");
assertEquals(0, dvs.nextDoc());
assertEquals(2L, dvs.longValue());
dvs = reader.getNumericDocValues("doubledv");
assertEquals(0, dvs.nextDoc());
assertEquals(Double.doubleToLongBits(3), dvs.longValue());
dvs = reader.getNumericDocValues("longdv");
assertEquals(0, dvs.nextDoc());
assertEquals(4L, dvs.longValue());
SortedDocValues sdv = reader.getSortedDocValues("stringdv");
assertEquals(0, sdv.nextDoc());
assertEquals("solr", sdv.binaryValue().utf8ToString());
sdv = reader.getSortedDocValues("booldv");
assertEquals(0, sdv.nextDoc());
assertEquals("T", sdv.binaryValue().utf8ToString());
final IndexSchema schema = core.getLatestSchema();
final SchemaField floatDv = schema.getField("floatdv");
final SchemaField intDv = schema.getField("intdv");
final SchemaField doubleDv = schema.getField("doubledv");
final SchemaField longDv = schema.getField("longdv");
final SchemaField boolDv = schema.getField("booldv");
FunctionValues values = floatDv.getType().getValueSource(floatDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(1f, values.floatVal(0), 0f);
assertEquals(1f, values.objectVal(0));
values = intDv.getType().getValueSource(intDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(2, values.intVal(0));
assertEquals(2, values.objectVal(0));
values = doubleDv.getType().getValueSource(doubleDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(3d, values.doubleVal(0), 0d);
assertEquals(3d, values.objectVal(0));
values = longDv.getType().getValueSource(longDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals(4L, values.longVal(0));
assertEquals(4L, values.objectVal(0));
values = boolDv.getType().getValueSource(boolDv, null).getValues(null, searcher.getSlowAtomicReader().leaves().get(0));
assertEquals("true", values.strVal(0));
assertEquals(true, values.objectVal(0));
// check reversibility of created fields
tstToObj(schema.getField("floatdv"), -1.5f);
tstToObj(schema.getField("floatdvs"), -1.5f);
tstToObj(schema.getField("doubledv"), -1.5d);
tstToObj(schema.getField("doubledvs"), -1.5d);
tstToObj(schema.getField("intdv"), -7);
tstToObj(schema.getField("intdvs"), -7);
tstToObj(schema.getField("longdv"), -11L);
tstToObj(schema.getField("longdvs"), -11L);
tstToObj(schema.getField("datedv"), new Date(1000));
tstToObj(schema.getField("datedvs"), new Date(1000));
tstToObj(schema.getField("stringdv"), "foo");
tstToObj(schema.getField("stringdvs"), "foo");
tstToObj(schema.getField("booldv"), true);
tstToObj(schema.getField("booldvs"), true);
} finally {
searcherRef.decref();
}
}
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class Lucene70DocValuesConsumer method doAddSortedField.
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
SortedDocValues values = valuesProducer.getSorted(field);
int numDocsWithField = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
numDocsWithField++;
}
if (numDocsWithField == 0) {
meta.writeLong(-2);
meta.writeLong(0L);
} else if (numDocsWithField == maxDoc) {
meta.writeLong(-1);
meta.writeLong(0L);
} else {
long offset = data.getFilePointer();
meta.writeLong(offset);
values = valuesProducer.getSorted(field);
IndexedDISI.writeBitSet(values, data);
meta.writeLong(data.getFilePointer() - offset);
}
meta.writeInt(numDocsWithField);
if (values.getValueCount() <= 1) {
meta.writeByte((byte) 0);
meta.writeLong(0L);
meta.writeLong(0L);
} else {
int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
meta.writeByte((byte) numberOfBitsPerOrd);
long start = data.getFilePointer();
meta.writeLong(start);
DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd);
values = valuesProducer.getSorted(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
writer.add(values.ordValue());
}
writer.finish();
meta.writeLong(data.getFilePointer() - start);
}
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCache method testDocValuesIntegration.
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
});
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
assertEquals(0, binary.nextDoc());
final BytesRef term = binary.binaryValue();
assertEquals("binary value", term.utf8ToString());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "binary");
});
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sorted");
});
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
assertEquals(0, binary.nextDoc());
BytesRef scratch = binary.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(0, sorted.nextDoc());
assertEquals(0, sorted.ordValue());
assertEquals(1, sorted.getValueCount());
scratch = sorted.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
assertEquals(0, numeric.nextDoc());
assertEquals(42, numeric.longValue());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "numeric");
});
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sortedset");
});
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
assertTrue(bits.get(0));
ir.close();
dir.close();
}
Aggregations