use of org.opensearch.index.fieldvisitor.FieldsVisitor in project OpenSearch by opensearch-project.
the class LuceneChangesSnapshot method readDocAsOp.
private Translog.Operation readDocAsOp(int docIndex) throws IOException {
final LeafReaderContext leaf = parallelArray.leafReaderContexts[docIndex];
final int segmentDocID = scoreDocs[docIndex].doc - leaf.docBase;
final long primaryTerm = parallelArray.primaryTerm[docIndex];
assert primaryTerm > 0 : "nested child document must be excluded";
final long seqNo = parallelArray.seqNo[docIndex];
// Only pick the first seen seq#
if (seqNo == lastSeenSeqNo) {
skippedOperations++;
return null;
}
final long version = parallelArray.version[docIndex];
final String sourceField = parallelArray.hasRecoverySource[docIndex] ? SourceFieldMapper.RECOVERY_SOURCE_NAME : SourceFieldMapper.NAME;
final FieldsVisitor fields = new FieldsVisitor(true, sourceField);
leaf.reader().document(segmentDocID, fields);
fields.postProcess(mapperService);
final Translog.Operation op;
final boolean isTombstone = parallelArray.isTombStone[docIndex];
if (isTombstone && fields.uid() == null) {
op = new Translog.NoOp(seqNo, primaryTerm, fields.source().utf8ToString());
assert version == 1L : "Noop tombstone should have version 1L; actual version [" + version + "]";
assert assertDocSoftDeleted(leaf.reader(), segmentDocID) : "Noop but soft_deletes field is not set [" + op + "]";
} else {
final String id = fields.uid().id();
final String type = fields.uid().type();
final Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(id));
if (isTombstone) {
op = new Translog.Delete(type, id, uid, seqNo, primaryTerm, version);
assert assertDocSoftDeleted(leaf.reader(), segmentDocID) : "Delete op but soft_deletes field is not set [" + op + "]";
} else {
final BytesReference source = fields.source();
if (source == null) {
// check for the existence source once we make peer-recovery to send ops after the local checkpoint.
if (requiredFullRange) {
throw new MissingHistoryOperationsException("source not found for seqno=" + seqNo + " from_seqno=" + fromSeqNo + " to_seqno=" + toSeqNo);
} else {
skippedOperations++;
return null;
}
}
// TODO: pass the latest timestamp from engine.
final long autoGeneratedIdTimestamp = -1;
op = new Translog.Index(type, id, seqNo, primaryTerm, version, source.toBytesRef().bytes, fields.routing(), autoGeneratedIdTimestamp);
}
}
assert fromSeqNo <= op.seqNo() && op.seqNo() <= toSeqNo && lastSeenSeqNo < op.seqNo() : "Unexpected operation; " + "last_seen_seqno [" + lastSeenSeqNo + "], from_seqno [" + fromSeqNo + "], to_seqno [" + toSeqNo + "], op [" + op + "]";
return op;
}
use of org.opensearch.index.fieldvisitor.FieldsVisitor in project OpenSearch by opensearch-project.
the class FetchPhase method prepareNestedHitContext.
/**
* Resets the provided {@link HitContext} with information on the current
* nested document. This includes the following:
* - Adding an initial {@link SearchHit} instance.
* - Loading the document source, filtering it based on the nested document ID, then
* setting it on {@link SourceLookup}. This allows fetch subphases that use the hit
* context to access the preloaded source.
*/
@SuppressWarnings("unchecked")
private HitContext prepareNestedHitContext(SearchContext context, int nestedTopDocId, int rootDocId, Map<String, Set<String>> storedToRequestedFields, LeafReaderContext subReaderContext, CheckedBiConsumer<Integer, FieldsVisitor, IOException> storedFieldReader) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
boolean needSource = sourceRequired(context) || context.highlight() != null;
Uid rootId;
Map<String, Object> rootSourceAsMap = null;
XContentType rootSourceContentType = null;
int nestedDocId = nestedTopDocId - subReaderContext.docBase;
if (context instanceof InnerHitsContext.InnerHitSubContext) {
InnerHitsContext.InnerHitSubContext innerHitsContext = (InnerHitsContext.InnerHitSubContext) context;
rootId = innerHitsContext.getRootId();
if (needSource) {
SourceLookup rootLookup = innerHitsContext.getRootLookup();
rootSourceAsMap = rootLookup.loadSourceIfNeeded();
rootSourceContentType = rootLookup.sourceContentType();
}
} else {
FieldsVisitor rootFieldsVisitor = new FieldsVisitor(needSource);
loadStoredFields(context.mapperService(), storedFieldReader, rootFieldsVisitor, rootDocId);
rootFieldsVisitor.postProcess(context.mapperService());
rootId = rootFieldsVisitor.uid();
if (needSource) {
if (rootFieldsVisitor.source() != null) {
Tuple<XContentType, Map<String, Object>> tuple = XContentHelper.convertToMap(rootFieldsVisitor.source(), false);
rootSourceAsMap = tuple.v2();
rootSourceContentType = tuple.v1();
} else {
rootSourceAsMap = Collections.emptyMap();
}
}
}
Map<String, DocumentField> docFields = emptyMap();
Map<String, DocumentField> metaFields = emptyMap();
if (context.hasStoredFields() && !context.storedFieldsContext().fieldNames().isEmpty()) {
FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(storedToRequestedFields.keySet(), false);
loadStoredFields(context.mapperService(), storedFieldReader, nestedFieldsVisitor, nestedDocId);
if (nestedFieldsVisitor.fields().isEmpty() == false) {
docFields = new HashMap<>();
metaFields = new HashMap<>();
fillDocAndMetaFields(context, nestedFieldsVisitor, storedToRequestedFields, docFields, metaFields);
}
}
DocumentMapper documentMapper = context.mapperService().documentMapper();
ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedDocId, context, subReaderContext);
assert nestedObjectMapper != null;
SearchHit.NestedIdentity nestedIdentity = getInternalNestedIdentity(context, nestedDocId, subReaderContext, context.mapperService(), nestedObjectMapper);
SearchHit hit = new SearchHit(nestedTopDocId, rootId.id(), nestedIdentity, docFields, metaFields);
// Use a clean, fresh SourceLookup
HitContext hitContext = new HitContext(hit, subReaderContext, nestedDocId, new SourceLookup());
if (rootSourceAsMap != null && rootSourceAsMap.isEmpty() == false) {
// Isolate the nested json array object that matches with nested hit and wrap it back into the same json
// structure with the nested json array object being the actual content. The latter is important, so that
// features like source filtering and highlighting work consistent regardless of whether the field points
// to a json object array for consistency reasons on how we refer to fields
Map<String, Object> nestedSourceAsMap = new HashMap<>();
Map<String, Object> current = nestedSourceAsMap;
for (SearchHit.NestedIdentity nested = nestedIdentity; nested != null; nested = nested.getChild()) {
String nestedPath = nested.getField().string();
current.put(nestedPath, new HashMap<>());
Object extractedValue = XContentMapValues.extractValue(nestedPath, rootSourceAsMap);
List<?> nestedParsedSource;
if (extractedValue instanceof List) {
// nested field has an array value in the _source
nestedParsedSource = (List<?>) extractedValue;
} else if (extractedValue instanceof Map) {
// nested field has an object value in the _source. This just means the nested field has just one inner object,
// which is valid, but uncommon.
nestedParsedSource = Collections.singletonList(extractedValue);
} else {
throw new IllegalStateException("extracted source isn't an object or an array");
}
if ((nestedParsedSource.get(0) instanceof Map) == false && nestedObjectMapper.parentObjectMapperAreNested(context.mapperService()) == false) {
// This is why only the first element of nestedParsedSource needs to be checked.
throw new IllegalArgumentException("Cannot execute inner hits. One or more parent object fields of nested field [" + nestedObjectMapper.name() + "] are not nested. All parent fields need to be nested fields too");
}
rootSourceAsMap = (Map<String, Object>) nestedParsedSource.get(nested.getOffset());
if (nested.getChild() == null) {
current.put(nestedPath, rootSourceAsMap);
} else {
Map<String, Object> next = new HashMap<>();
current.put(nestedPath, next);
current = next;
}
}
hitContext.sourceLookup().setSource(nestedSourceAsMap);
hitContext.sourceLookup().setSourceContentType(rootSourceContentType);
}
return hitContext;
}
use of org.opensearch.index.fieldvisitor.FieldsVisitor in project OpenSearch by opensearch-project.
the class FetchPhase method execute.
public void execute(SearchContext context) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("{}", new SearchContextSourcePrinter(context));
}
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
}
if (context.docIdsToLoadSize() == 0) {
// no individual hits to process, so we shortcut
context.fetchResult().hits(new SearchHits(new SearchHit[0], context.queryResult().getTotalHits(), context.queryResult().getMaxScore()));
return;
}
DocIdToIndex[] docs = new DocIdToIndex[context.docIdsToLoadSize()];
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
docs[index] = new DocIdToIndex(context.docIdsToLoad()[context.docIdsToLoadFrom() + index], index);
}
// make sure that we iterate in doc id order
Arrays.sort(docs);
Map<String, Set<String>> storedToRequestedFields = new HashMap<>();
FieldsVisitor fieldsVisitor = createStoredFieldsVisitor(context, storedToRequestedFields);
FetchContext fetchContext = new FetchContext(context);
SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
List<FetchSubPhaseProcessor> processors = getProcessors(context.shardTarget(), fetchContext);
int currentReaderIndex = -1;
LeafReaderContext currentReaderContext = null;
CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader = null;
boolean hasSequentialDocs = hasSequentialDocs(docs);
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
}
int docId = docs[index].docId;
try {
int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
if (currentReaderIndex != readerIndex) {
currentReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
currentReaderIndex = readerIndex;
if (currentReaderContext.reader() instanceof SequentialStoredFieldsLeafReader && hasSequentialDocs && docs.length >= 10) {
// All the docs to fetch are adjacent but Lucene stored fields are optimized
// for random access and don't optimize for sequential access - except for merging.
// So we do a little hack here and pretend we're going to do merges in order to
// get better sequential access.
SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) currentReaderContext.reader();
fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
} else {
fieldReader = currentReaderContext.reader()::document;
}
for (FetchSubPhaseProcessor processor : processors) {
processor.setNextReader(currentReaderContext);
}
}
assert currentReaderContext != null;
HitContext hit = prepareHitContext(context, fetchContext.searchLookup(), fieldsVisitor, docId, storedToRequestedFields, currentReaderContext, fieldReader);
for (FetchSubPhaseProcessor processor : processors) {
processor.process(hit);
}
hits[docs[index].index] = hit.hit();
} catch (Exception e) {
throw new FetchPhaseExecutionException(context.shardTarget(), "Error running fetch phase for doc [" + docId + "]", e);
}
}
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled task with reason: " + context.getTask().getReasonCancelled());
}
TotalHits totalHits = context.queryResult().getTotalHits();
context.fetchResult().hits(new SearchHits(hits, totalHits, context.queryResult().getMaxScore()));
}
use of org.opensearch.index.fieldvisitor.FieldsVisitor in project OpenSearch by opensearch-project.
the class ShardGetService method innerGetLoadFromStoredFields.
private GetResult innerGetLoadFromStoredFields(String id, String[] storedFields, FetchSourceContext fetchSourceContext, Engine.GetResult get, MapperService mapperService) {
assert get.exists() : "method should only be called if document could be retrieved";
// check first if stored fields to be loaded don't contain an object field
DocumentMapper docMapper = mapperService.documentMapper();
if (storedFields != null) {
for (String field : storedFields) {
Mapper fieldMapper = docMapper.mappers().getMapper(field);
if (fieldMapper == null) {
if (docMapper.objectMappers().get(field) != null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
throw new IllegalArgumentException("field [" + field + "] isn't a leaf field");
}
}
}
}
Map<String, DocumentField> documentFields = null;
Map<String, DocumentField> metadataFields = null;
BytesReference source = null;
DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
// force fetching source if we read from translog and need to recreate stored fields
boolean forceSourceForComputingTranslogStoredFields = get.isFromTranslog() && storedFields != null && Stream.of(storedFields).anyMatch(f -> TranslogLeafReader.ALL_FIELD_NAMES.contains(f) == false);
FieldsVisitor fieldVisitor = buildFieldsVisitors(storedFields, forceSourceForComputingTranslogStoredFields ? FetchSourceContext.FETCH_SOURCE : fetchSourceContext);
if (fieldVisitor != null) {
try {
docIdAndVersion.reader.document(docIdAndVersion.docId, fieldVisitor);
} catch (IOException e) {
throw new OpenSearchException("Failed to get id [" + id + "]", e);
}
source = fieldVisitor.source();
// in case we read from translog, some extra steps are needed to make _source consistent and to load stored fields
if (get.isFromTranslog()) {
// just make source consistent by reapplying source filters from mapping (possibly also nulling the source)
if (forceSourceForComputingTranslogStoredFields == false) {
try {
source = indexShard.mapperService().documentMapper().sourceMapper().applyFilters(source, null);
} catch (IOException e) {
throw new OpenSearchException("Failed to reapply filters for [" + id + "] after reading from translog", e);
}
} else {
// Slow path: recreate stored fields from original source
assert source != null : "original source in translog must exist";
SourceToParse sourceToParse = new SourceToParse(shardId.getIndexName(), MapperService.SINGLE_MAPPING_NAME, id, source, XContentHelper.xContentType(source), fieldVisitor.routing());
ParsedDocument doc = indexShard.mapperService().documentMapper().parse(sourceToParse);
assert doc.dynamicMappingsUpdate() == null : "mapping updates should not be required on already-indexed doc";
// update special fields
doc.updateSeqID(docIdAndVersion.seqNo, docIdAndVersion.primaryTerm);
doc.version().setLongValue(docIdAndVersion.version);
// retrieve stored fields from parsed doc
fieldVisitor = buildFieldsVisitors(storedFields, fetchSourceContext);
for (IndexableField indexableField : doc.rootDoc().getFields()) {
IndexableFieldType fieldType = indexableField.fieldType();
if (fieldType.stored()) {
FieldInfo fieldInfo = new FieldInfo(indexableField.name(), 0, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
StoredFieldVisitor.Status status = fieldVisitor.needsField(fieldInfo);
if (status == StoredFieldVisitor.Status.YES) {
if (indexableField.numericValue() != null) {
fieldVisitor.objectField(fieldInfo, indexableField.numericValue());
} else if (indexableField.binaryValue() != null) {
fieldVisitor.binaryField(fieldInfo, indexableField.binaryValue());
} else if (indexableField.stringValue() != null) {
fieldVisitor.objectField(fieldInfo, indexableField.stringValue());
}
} else if (status == StoredFieldVisitor.Status.STOP) {
break;
}
}
}
// retrieve source (with possible transformations, e.g. source filters
source = fieldVisitor.source();
}
}
// put stored fields into result objects
if (!fieldVisitor.fields().isEmpty()) {
fieldVisitor.postProcess(mapperService);
documentFields = new HashMap<>();
metadataFields = new HashMap<>();
for (Map.Entry<String, List<Object>> entry : fieldVisitor.fields().entrySet()) {
if (mapperService.isMetadataField(entry.getKey())) {
metadataFields.put(entry.getKey(), new DocumentField(entry.getKey(), entry.getValue()));
} else {
documentFields.put(entry.getKey(), new DocumentField(entry.getKey(), entry.getValue()));
}
}
}
}
if (source != null) {
// apply request-level source filtering
if (fetchSourceContext.fetchSource() == false) {
source = null;
} else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
Map<String, Object> sourceAsMap;
// TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
// Do we care?
Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
XContentType sourceContentType = typeMapTuple.v1();
sourceAsMap = typeMapTuple.v2();
sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
try {
source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
} catch (IOException e) {
throw new OpenSearchException("Failed to get id [" + id + "] with includes/excludes set", e);
}
}
}
if (!fetchSourceContext.fetchSource()) {
source = null;
}
if (source != null && get.isFromTranslog()) {
// reapply source filters from mapping (possibly also nulling the source)
try {
source = docMapper.sourceMapper().applyFilters(source, null);
} catch (IOException e) {
throw new OpenSearchException("Failed to reapply filters for [" + id + "] after reading from translog", e);
}
}
if (source != null && (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0)) {
Map<String, Object> sourceAsMap;
// TODO: The source might parsed and available in the sourceLookup but that one uses unordered maps so different. Do we care?
Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
XContentType sourceContentType = typeMapTuple.v1();
sourceAsMap = typeMapTuple.v2();
sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
try {
source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
} catch (IOException e) {
throw new OpenSearchException("Failed to get id [" + id + "] with includes/excludes set", e);
}
}
return new GetResult(shardId.getIndexName(), id, get.docIdAndVersion().seqNo, get.docIdAndVersion().primaryTerm, get.version(), get.exists(), source, documentFields, metadataFields);
}
use of org.opensearch.index.fieldvisitor.FieldsVisitor in project OpenSearch by opensearch-project.
the class SourceLookup method loadSourceIfNeeded.
// Scripting requires this method to be public. Using source()
// is not possible because certain checks use source == null as
// as a determination if source is enabled/disabled, but it should
// never be a null Map for scripting even when disabled.
public Map<String, Object> loadSourceIfNeeded() {
if (source != null) {
return source;
}
if (sourceAsBytes != null) {
Tuple<XContentType, Map<String, Object>> tuple = sourceAsMapAndType(sourceAsBytes);
sourceContentType = tuple.v1();
source = tuple.v2();
return source;
}
try {
FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
fieldReader.accept(docId, sourceFieldVisitor);
BytesReference source = sourceFieldVisitor.source();
if (source == null) {
this.source = emptyMap();
this.sourceContentType = null;
} else {
Tuple<XContentType, Map<String, Object>> tuple = sourceAsMapAndType(source);
this.sourceContentType = tuple.v1();
this.source = tuple.v2();
}
} catch (Exception e) {
throw new OpenSearchParseException("failed to parse / load source", e);
}
return this.source;
}
Aggregations