use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class FastTaxonomyFacetCounts method countAll.
private final void countAll(IndexReader reader) throws IOException {
for (LeafReaderContext context : reader.leaves()) {
BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
if (dv == null) {
// this reader does not have DocValues for the requested category list
continue;
}
Bits liveDocs = context.reader().getLiveDocs();
for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int ord = 0;
int offset = bytesRef.offset;
int prev = 0;
while (offset < end) {
byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
++values[ord];
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
}
}
}
}
rollup();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class JoinDocFreqValueSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final BinaryDocValues terms = DocValues.getBinary(readerContext.reader(), field);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
Terms t = MultiFields.getTerms(top, qfield);
final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator();
return new IntDocValues(this) {
int lastDocID = -1;
@Override
public int intVal(int doc) throws IOException {
if (doc < lastDocID) {
throw new IllegalArgumentException("docs were sent out-of-order: lastDocID=" + lastDocID + " vs docID=" + doc);
}
lastDocID = doc;
int curDocID = terms.docID();
if (doc > curDocID) {
curDocID = terms.advance(doc);
}
if (doc == curDocID) {
BytesRef term = terms.binaryValue();
if (termsEnum.seekExact(term)) {
return termsEnum.docFreq();
}
}
return 0;
}
};
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestOrdinalMappingLeafReader method verifyResults.
private void verifyResults(Directory indexDir, Directory taxoDir) throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher searcher = newSearcher(indexReader);
FacetsCollector collector = new FacetsCollector();
FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, collector);
// tag facets
Facets tagFacets = new FastTaxonomyFacetCounts("$tags", taxoReader, facetConfig, collector);
FacetResult result = tagFacets.getTopChildren(10, "tag");
for (LabelAndValue lv : result.labelValues) {
if (VERBOSE) {
System.out.println(lv);
}
assertEquals(NUM_DOCS, lv.value.intValue());
}
// id facets
Facets idFacets = new FastTaxonomyFacetCounts(taxoReader, facetConfig, collector);
FacetResult idResult = idFacets.getTopChildren(10, "id");
assertEquals(NUM_DOCS, idResult.childCount);
// each "id" appears twice
assertEquals(NUM_DOCS * 2, idResult.value);
BinaryDocValues bdv = MultiDocValues.getBinaryValues(indexReader, "bdv");
BinaryDocValues cbdv = MultiDocValues.getBinaryValues(indexReader, "cbdv");
for (int i = 0; i < indexReader.maxDoc(); i++) {
assertEquals(i, bdv.nextDoc());
assertEquals(i, cbdv.nextDoc());
assertEquals(Integer.parseInt(cbdv.binaryValue().utf8ToString()), Integer.parseInt(bdv.binaryValue().utf8ToString()) * 2);
}
IOUtils.close(indexReader, taxoReader);
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class AnalyzingInfixSuggester method createResults.
/**
* Create the results based on the search hits.
* Can be overridden by subclass to add particular behavior (e.g. weight transformation).
* Note that there is no prefix toke (the {@code prefixToken} argument will
* be null) whenever the final token in the incoming request was in fact finished
* (had trailing characters, such as white-space).
*
* @throws IOException If there are problems reading fields from the underlying Lucene index.
*/
protected List<LookupResult> createResults(IndexSearcher searcher, TopFieldDocs hits, int num, CharSequence charSequence, boolean doHighlight, Set<String> matchedTokens, String prefixToken) throws IOException {
List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
List<LookupResult> results = new ArrayList<>();
for (int i = 0; i < hits.scoreDocs.length; i++) {
FieldDoc fd = (FieldDoc) hits.scoreDocs[i];
BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
textDV.advance(fd.doc);
BytesRef term = textDV.binaryValue();
String text = term.utf8ToString();
long score = (Long) fd.fields[0];
// This will just be null if app didn't pass payloads to build():
// TODO: maybe just stored fields? they compress...
BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
BytesRef payload;
if (payloadsDV != null) {
if (payloadsDV.advance(fd.doc) == fd.doc) {
payload = BytesRef.deepCopyOf(payloadsDV.binaryValue());
} else {
payload = new BytesRef(BytesRef.EMPTY_BYTES);
}
} else {
payload = null;
}
// Must look up sorted-set by segment:
int segment = ReaderUtil.subIndex(fd.doc, leaves);
SortedSetDocValues contextsDV = leaves.get(segment).reader().getSortedSetDocValues(CONTEXTS_FIELD_NAME);
Set<BytesRef> contexts;
if (contextsDV != null) {
contexts = new HashSet<BytesRef>();
int targetDocID = fd.doc - leaves.get(segment).docBase;
if (contextsDV.advance(targetDocID) == targetDocID) {
long ord;
while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord));
contexts.add(context);
}
}
} else {
contexts = null;
}
LookupResult result;
if (doHighlight) {
result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload, contexts);
} else {
result = new LookupResult(text, score, payload, contexts);
}
results.add(result);
}
return results;
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class SolrDocumentFetcher method decorateDocValueFields.
/**
* This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
*
* @param doc
* A SolrDocument or SolrInputDocument instance where docValues will be added
* @param docid
* The lucene docid of the document to be populated
* @param fields
* The list of docValues fields to be decorated
*/
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException {
final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
final int localId = docid - leafContexts.get(subIndex).docBase;
final LeafReader leafReader = leafContexts.get(subIndex).reader();
for (String fieldName : fields) {
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
continue;
}
FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
if (fi == null) {
// Searcher doesn't have info about this field, hence ignore it.
continue;
}
final DocValuesType dvType = fi.getDocValuesType();
switch(dvType) {
case NUMERIC:
final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
if (ndv == null) {
continue;
}
Long val;
if (ndv.advanceExact(localId)) {
val = ndv.longValue();
} else {
continue;
}
Object newVal = val;
if (schemaField.getType().isPointField()) {
// TODO: Maybe merge PointField with TrieFields here
NumberType type = schemaField.getType().getNumberType();
switch(type) {
case INTEGER:
newVal = val.intValue();
break;
case LONG:
newVal = val.longValue();
break;
case FLOAT:
newVal = Float.intBitsToFloat(val.intValue());
break;
case DOUBLE:
newVal = Double.longBitsToDouble(val);
break;
case DATE:
newVal = new Date(val);
break;
default:
throw new AssertionError("Unexpected PointType: " + type);
}
} else {
if (schemaField.getType() instanceof TrieIntField) {
newVal = val.intValue();
} else if (schemaField.getType() instanceof TrieFloatField) {
newVal = Float.intBitsToFloat(val.intValue());
} else if (schemaField.getType() instanceof TrieDoubleField) {
newVal = Double.longBitsToDouble(val);
} else if (schemaField.getType() instanceof TrieDateField) {
newVal = new Date(val);
} else if (schemaField.getType() instanceof EnumField) {
newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
}
}
doc.addField(fieldName, newVal);
break;
case BINARY:
BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
if (bdv == null) {
continue;
}
BytesRef value;
if (bdv.advanceExact(localId)) {
value = BytesRef.deepCopyOf(bdv.binaryValue());
} else {
continue;
}
doc.addField(fieldName, value);
break;
case SORTED:
SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
if (sdv == null) {
continue;
}
if (sdv.advanceExact(localId)) {
final BytesRef bRef = sdv.binaryValue();
// Special handling for Boolean fields since they're stored as 'T' and 'F'.
if (schemaField.getType() instanceof BoolField) {
doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
} else {
doc.addField(fieldName, bRef.utf8ToString());
}
}
break;
case SORTED_NUMERIC:
final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
NumberType type = schemaField.getType().getNumberType();
if (numericDv != null) {
if (numericDv.advance(localId) == localId) {
final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
for (int i = 0; i < numericDv.docValueCount(); i++) {
long number = numericDv.nextValue();
switch(type) {
case INTEGER:
outValues.add((int) number);
break;
case LONG:
outValues.add(number);
break;
case FLOAT:
outValues.add(NumericUtils.sortableIntToFloat((int) number));
break;
case DOUBLE:
outValues.add(NumericUtils.sortableLongToDouble(number));
break;
case DATE:
outValues.add(new Date(number));
break;
default:
throw new AssertionError("Unexpected PointType: " + type);
}
}
assert outValues.size() > 0;
doc.addField(fieldName, outValues);
}
}
case SORTED_SET:
final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
if (values != null && values.getValueCount() > 0) {
if (values.advance(localId) == localId) {
final List<Object> outValues = new LinkedList<>();
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
value = values.lookupOrd(ord);
outValues.add(schemaField.getType().toObject(schemaField, value));
}
assert outValues.size() > 0;
doc.addField(fieldName, outValues);
}
}
case NONE:
break;
}
}
}
Aggregations