use of org.apache.lucene.index.StoredFieldVisitor in project elasticsearch by elastic.
the class SimpleLuceneTests method testOrdering.
/**
* Here, we verify that the order that we add fields to a document counts, and not the lexi order
* of the field. This means that heavily accessed fields that use field selector should be added
* first (with load and break).
*/
public void testOrdering() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("#id", "1", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
final ArrayList<String> fieldsOrder = new ArrayList<>();
searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() {
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
fieldsOrder.add(fieldInfo.name);
return Status.YES;
}
});
assertThat(fieldsOrder.size(), equalTo(2));
assertThat(fieldsOrder.get(0), equalTo("_id"));
assertThat(fieldsOrder.get(1), equalTo("#id"));
indexWriter.close();
}
use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.
the class SolrDocumentFetcher method visitFromCached.
/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
for (IndexableField f : document) {
final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
final StoredFieldVisitor.Status needsField = visitor.needsField(info);
if (needsField == StoredFieldVisitor.Status.STOP)
return;
if (needsField == StoredFieldVisitor.Status.NO)
continue;
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
continue;
}
Number numericValue = f.numericValue();
if (numericValue != null) {
if (numericValue instanceof Double) {
visitor.doubleField(info, numericValue.doubleValue());
} else if (numericValue instanceof Integer) {
visitor.intField(info, numericValue.intValue());
} else if (numericValue instanceof Float) {
visitor.floatField(info, numericValue.floatValue());
} else if (numericValue instanceof Long) {
visitor.longField(info, numericValue.longValue());
} else {
throw new AssertionError();
}
continue;
}
// must be String
if (f instanceof LargeLazyField) {
// optimization to avoid premature string conversion
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
} else {
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
}
}
}
use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.
the class DocNameExtractor method docName.
/**
* Extract the name of the input doc from the index.
* @param searcher access to the index.
* @param docid ID of doc whose name is needed.
* @return the name of the input doc as extracted from the index.
* @throws IOException if cannot extract the doc name from the index.
*/
public String docName(IndexSearcher searcher, int docid) throws IOException {
final List<String> name = new ArrayList<>();
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
String value = new String(bytes, StandardCharsets.UTF_8);
name.add(value);
}
@Override
public Status needsField(FieldInfo fieldInfo) {
if (!name.isEmpty()) {
return Status.STOP;
} else if (fieldInfo.name.equals(docNameField)) {
return Status.YES;
} else {
return Status.NO;
}
}
});
if (name.size() != 0) {
return name.get(0);
} else {
return null;
}
}
use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.
the class FieldOptions method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
NamedList<Object> termVectors = new NamedList<>();
rb.rsp.add(TERM_VECTORS, termVectors);
IndexSchema schema = rb.req.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
String uniqFieldName = null;
if (keyField != null) {
uniqFieldName = keyField.getName();
}
FieldOptions allFields = new FieldOptions();
//figure out what options we have, and try to get the appropriate vector
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
//short cut to all values.
if (params.getBool(TermVectorParams.ALL, false)) {
allFields.termFreq = true;
allFields.positions = true;
allFields.offsets = true;
allFields.payloads = true;
allFields.docFreq = true;
allFields.tfIdf = true;
}
//Build up our per field mapping
Map<String, FieldOptions> fieldOptions = new HashMap<>();
NamedList<List<String>> warnings = new NamedList<>();
List<String> noTV = new ArrayList<>();
List<String> noPos = new ArrayList<>();
List<String> noOff = new ArrayList<>();
List<String> noPay = new ArrayList<>();
Set<String> fields = getFields(rb);
if (null != fields) {
//we have specific fields to retrieve, or no fields
for (String field : fields) {
// workaround SOLR-3523
if (null == field || "score".equals(field))
continue;
// we don't want to issue warnings about the uniqueKey field
// since it can cause lots of confusion in distributed requests
// where the uniqueKey field is injected into the fl for merging
final boolean fieldIsUniqueKey = field.equals(uniqFieldName);
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
if (sf.storeTermVector()) {
FieldOptions option = fieldOptions.get(field);
if (option == null) {
option = new FieldOptions();
option.fieldName = field;
fieldOptions.put(field, option);
}
//get the per field mappings
option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
//Validate these are even an option
option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
noPos.add(field);
}
option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
noOff.add(field);
}
option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey) {
noPay.add(field);
}
} else {
//field doesn't have term vectors
if (!fieldIsUniqueKey)
noTV.add(field);
}
} else {
//field doesn't exist
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
}
}
}
// to be changed to account for that.
if (!noTV.isEmpty()) {
warnings.add("noTermVectors", noTV);
}
if (!noPos.isEmpty()) {
warnings.add("noPositions", noPos);
}
if (!noOff.isEmpty()) {
warnings.add("noOffsets", noOff);
}
if (!noPay.isEmpty()) {
warnings.add("noPayloads", noPay);
}
if (warnings.size() > 0) {
termVectors.add(TV_KEY_WARNINGS, warnings);
}
DocListAndSet listAndSet = rb.getResults();
List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
Iterator<Integer> iter;
if (docIds != null && !docIds.isEmpty()) {
iter = docIds.iterator();
} else {
DocList list = listAndSet.docList;
iter = list.iterator();
}
SolrIndexSearcher searcher = rb.req.getSearcher();
IndexReader reader = searcher.getIndexReader();
//the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
//Only load the id field to get the uniqueKey of that
//field
final String finalUniqFieldName = uniqFieldName;
final List<String> uniqValues = new ArrayList<>();
// TODO: is this required to be single-valued? if so, we should STOP
// once we find it...
final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
}
@Override
public void intField(FieldInfo fieldInfo, int value) {
uniqValues.add(Integer.toString(value));
}
@Override
public void longField(FieldInfo fieldInfo, long value) {
uniqValues.add(Long.toString(value));
}
@Override
public Status needsField(FieldInfo fieldInfo) {
return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
}
};
while (iter.hasNext()) {
Integer docId = iter.next();
NamedList<Object> docNL = new NamedList<>();
if (keyField != null) {
reader.document(docId, getUniqValue);
String uniqVal = null;
if (uniqValues.size() != 0) {
uniqVal = uniqValues.get(0);
uniqValues.clear();
docNL.add("uniqueKey", uniqVal);
termVectors.add(uniqVal, docNL);
}
} else {
// support for schemas w/o a unique key,
termVectors.add("doc-" + docId, docNL);
}
if (null != fields) {
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
final String field = entry.getKey();
final Terms vector = reader.getTermVector(docId, field);
if (vector != null) {
TermsEnum termsEnum = vector.iterator();
mapOneVector(docNL, entry.getValue(), reader, docId, termsEnum, field);
}
}
} else {
// extract all fields
final Fields vectors = reader.getTermVectors(docId);
for (String field : vectors) {
Terms terms = vectors.terms(field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
}
}
}
}
}
use of org.apache.lucene.index.StoredFieldVisitor in project lucene-solr by apache.
the class BaseFragmentsBuilder method getFields.
protected Field[] getFields(IndexReader reader, int docId, final String fieldName) throws IOException {
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
final List<Field> fields = new ArrayList<>();
reader.document(docId, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
String value = new String(bytes, StandardCharsets.UTF_8);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft));
}
@Override
public Status needsField(FieldInfo fieldInfo) {
return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
}
});
return fields.toArray(new Field[fields.size()]);
}
Aggregations