Search in sources :

Example 96 with ParsedDocument

use of org.opensearch.index.mapper.ParsedDocument in project OpenSearch by opensearch-project.

the class InternalEngine method deleteInLucene.

private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws IOException {
    assert assertMaxSeqNoOfUpdatesIsAdvanced(delete.uid(), delete.seqNo(), false, false);
    try {
        final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newDeleteTombstoneDoc(delete.type(), delete.id());
        assert tombstone.docs().size() == 1 : "Tombstone doc should have single doc [" + tombstone + "]";
        tombstone.updateSeqID(delete.seqNo(), delete.primaryTerm());
        tombstone.version().setLongValue(plan.versionOfDeletion);
        final ParseContext.Document doc = tombstone.docs().get(0);
        assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null : "Delete tombstone document but _tombstone field is not set [" + doc + " ]";
        doc.add(softDeletesField);
        if (plan.addStaleOpToLucene || plan.currentlyDeleted) {
            indexWriter.addDocument(doc);
        } else {
            indexWriter.softUpdateDocument(delete.uid(), doc, softDeletesField);
        }
        return new DeleteResult(plan.versionOfDeletion, delete.primaryTerm(), delete.seqNo(), plan.currentlyDeleted == false);
    } catch (final Exception ex) {
        /*
             * Document level failures when deleting are unexpected, we likely hit something fatal such as the Lucene index being corrupt,
             * or the Lucene document limit. We have already issued a sequence number here so this is fatal, fail the engine.
             */
        if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
            final String reason = String.format(Locale.ROOT, "delete id[%s] origin [%s] seq#[%d] failed at the document level", delete.id(), delete.origin(), delete.seqNo());
            failEngine(reason, ex);
        }
        throw ex;
    }
}
Also used : ParsedDocument(org.opensearch.index.mapper.ParsedDocument) ParseContext(org.opensearch.index.mapper.ParseContext) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) IOException(java.io.IOException) TranslogCorruptedException(org.opensearch.index.translog.TranslogCorruptedException)

Example 97 with ParsedDocument

use of org.opensearch.index.mapper.ParsedDocument in project OpenSearch by opensearch-project.

the class TermVectorsService method generateTermVectorsFromDoc.

private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVectorsRequest request) throws IOException {
    // parse the document, at the moment we do update the mapping, just like percolate
    ParsedDocument parsedDocument = parseDocument(indexShard, indexShard.shardId().getIndexName(), request.doc(), request.xContentType(), request.routing());
    // select the right fields and generate term vectors
    ParseContext.Document doc = parsedDocument.rootDoc();
    Set<String> seenFields = new HashSet<>();
    Collection<DocumentField> documentFields = new HashSet<>();
    for (IndexableField field : doc.getFields()) {
        MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name());
        if (!isValidField(fieldType)) {
            continue;
        }
        if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
            continue;
        }
        if (seenFields.contains(field.name())) {
            continue;
        } else {
            seenFields.add(field.name());
        }
        String[] values = getValues(doc.getFields(field.name()));
        documentFields.add(new DocumentField(field.name(), Arrays.asList((Object[]) values)));
    }
    return generateTermVectors(indexShard, XContentHelper.convertToMap(parsedDocument.source(), true, request.xContentType()).v2(), documentFields, request.offsets(), request.perFieldAnalyzer(), seenFields);
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) ParsedDocument(org.opensearch.index.mapper.ParsedDocument) DocumentField(org.opensearch.common.document.DocumentField) ParseContext(org.opensearch.index.mapper.ParseContext) MappedFieldType(org.opensearch.index.mapper.MappedFieldType) HashSet(java.util.HashSet)

Example 98 with ParsedDocument

use of org.opensearch.index.mapper.ParsedDocument in project OpenSearch by opensearch-project.

the class TermVectorsService method parseDocument.

private static ParsedDocument parseDocument(IndexShard indexShard, String index, BytesReference doc, XContentType xContentType, String routing) {
    MapperService mapperService = indexShard.mapperService();
    DocumentMapperForType docMapper = mapperService.documentMapperWithAutoCreate(MapperService.SINGLE_MAPPING_NAME);
    ParsedDocument parsedDocument = docMapper.getDocumentMapper().parse(new SourceToParse(index, MapperService.SINGLE_MAPPING_NAME, "_id_for_tv_api", doc, xContentType, routing));
    if (docMapper.getMapping() != null) {
        parsedDocument.addDynamicMappingsUpdate(docMapper.getMapping());
    }
    return parsedDocument;
}
Also used : ParsedDocument(org.opensearch.index.mapper.ParsedDocument) DocumentMapperForType(org.opensearch.index.mapper.DocumentMapperForType) SourceToParse(org.opensearch.index.mapper.SourceToParse) MapperService(org.opensearch.index.mapper.MapperService)

Example 99 with ParsedDocument

use of org.opensearch.index.mapper.ParsedDocument in project OpenSearch by opensearch-project.

the class ShardGetService method innerGetLoadFromStoredFields.

private GetResult innerGetLoadFromStoredFields(String id, String[] storedFields, FetchSourceContext fetchSourceContext, Engine.GetResult get, MapperService mapperService) {
    assert get.exists() : "method should only be called if document could be retrieved";
    // check first if stored fields to be loaded don't contain an object field
    DocumentMapper docMapper = mapperService.documentMapper();
    if (storedFields != null) {
        for (String field : storedFields) {
            Mapper fieldMapper = docMapper.mappers().getMapper(field);
            if (fieldMapper == null) {
                if (docMapper.objectMappers().get(field) != null) {
                    // Only fail if we know it is a object field, missing paths / fields shouldn't fail.
                    throw new IllegalArgumentException("field [" + field + "] isn't a leaf field");
                }
            }
        }
    }
    Map<String, DocumentField> documentFields = null;
    Map<String, DocumentField> metadataFields = null;
    BytesReference source = null;
    DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
    // force fetching source if we read from translog and need to recreate stored fields
    boolean forceSourceForComputingTranslogStoredFields = get.isFromTranslog() && storedFields != null && Stream.of(storedFields).anyMatch(f -> TranslogLeafReader.ALL_FIELD_NAMES.contains(f) == false);
    FieldsVisitor fieldVisitor = buildFieldsVisitors(storedFields, forceSourceForComputingTranslogStoredFields ? FetchSourceContext.FETCH_SOURCE : fetchSourceContext);
    if (fieldVisitor != null) {
        try {
            docIdAndVersion.reader.document(docIdAndVersion.docId, fieldVisitor);
        } catch (IOException e) {
            throw new OpenSearchException("Failed to get id [" + id + "]", e);
        }
        source = fieldVisitor.source();
        // in case we read from translog, some extra steps are needed to make _source consistent and to load stored fields
        if (get.isFromTranslog()) {
            // just make source consistent by reapplying source filters from mapping (possibly also nulling the source)
            if (forceSourceForComputingTranslogStoredFields == false) {
                try {
                    source = indexShard.mapperService().documentMapper().sourceMapper().applyFilters(source, null);
                } catch (IOException e) {
                    throw new OpenSearchException("Failed to reapply filters for [" + id + "] after reading from translog", e);
                }
            } else {
                // Slow path: recreate stored fields from original source
                assert source != null : "original source in translog must exist";
                SourceToParse sourceToParse = new SourceToParse(shardId.getIndexName(), MapperService.SINGLE_MAPPING_NAME, id, source, XContentHelper.xContentType(source), fieldVisitor.routing());
                ParsedDocument doc = indexShard.mapperService().documentMapper().parse(sourceToParse);
                assert doc.dynamicMappingsUpdate() == null : "mapping updates should not be required on already-indexed doc";
                // update special fields
                doc.updateSeqID(docIdAndVersion.seqNo, docIdAndVersion.primaryTerm);
                doc.version().setLongValue(docIdAndVersion.version);
                // retrieve stored fields from parsed doc
                fieldVisitor = buildFieldsVisitors(storedFields, fetchSourceContext);
                for (IndexableField indexableField : doc.rootDoc().getFields()) {
                    IndexableFieldType fieldType = indexableField.fieldType();
                    if (fieldType.stored()) {
                        FieldInfo fieldInfo = new FieldInfo(indexableField.name(), 0, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
                        StoredFieldVisitor.Status status = fieldVisitor.needsField(fieldInfo);
                        if (status == StoredFieldVisitor.Status.YES) {
                            if (indexableField.numericValue() != null) {
                                fieldVisitor.objectField(fieldInfo, indexableField.numericValue());
                            } else if (indexableField.binaryValue() != null) {
                                fieldVisitor.binaryField(fieldInfo, indexableField.binaryValue());
                            } else if (indexableField.stringValue() != null) {
                                fieldVisitor.objectField(fieldInfo, indexableField.stringValue());
                            }
                        } else if (status == StoredFieldVisitor.Status.STOP) {
                            break;
                        }
                    }
                }
                // retrieve source (with possible transformations, e.g. source filters
                source = fieldVisitor.source();
            }
        }
        // put stored fields into result objects
        if (!fieldVisitor.fields().isEmpty()) {
            fieldVisitor.postProcess(mapperService);
            documentFields = new HashMap<>();
            metadataFields = new HashMap<>();
            for (Map.Entry<String, List<Object>> entry : fieldVisitor.fields().entrySet()) {
                if (mapperService.isMetadataField(entry.getKey())) {
                    metadataFields.put(entry.getKey(), new DocumentField(entry.getKey(), entry.getValue()));
                } else {
                    documentFields.put(entry.getKey(), new DocumentField(entry.getKey(), entry.getValue()));
                }
            }
        }
    }
    if (source != null) {
        // apply request-level source filtering
        if (fetchSourceContext.fetchSource() == false) {
            source = null;
        } else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
            Map<String, Object> sourceAsMap;
            // TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
            // Do we care?
            Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
            XContentType sourceContentType = typeMapTuple.v1();
            sourceAsMap = typeMapTuple.v2();
            sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
            try {
                source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
            } catch (IOException e) {
                throw new OpenSearchException("Failed to get id [" + id + "] with includes/excludes set", e);
            }
        }
    }
    if (!fetchSourceContext.fetchSource()) {
        source = null;
    }
    if (source != null && get.isFromTranslog()) {
        // reapply source filters from mapping (possibly also nulling the source)
        try {
            source = docMapper.sourceMapper().applyFilters(source, null);
        } catch (IOException e) {
            throw new OpenSearchException("Failed to reapply filters for [" + id + "] after reading from translog", e);
        }
    }
    if (source != null && (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0)) {
        Map<String, Object> sourceAsMap;
        // TODO: The source might parsed and available in the sourceLookup but that one uses unordered maps so different. Do we care?
        Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
        XContentType sourceContentType = typeMapTuple.v1();
        sourceAsMap = typeMapTuple.v2();
        sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
        try {
            source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
        } catch (IOException e) {
            throw new OpenSearchException("Failed to get id [" + id + "] with includes/excludes set", e);
        }
    }
    return new GetResult(shardId.getIndexName(), id, get.docIdAndVersion().seqNo, get.docIdAndVersion().primaryTerm, get.version(), get.exists(), source, documentFields, metadataFields);
}
Also used : CustomFieldsVisitor(org.opensearch.index.fieldvisitor.CustomFieldsVisitor) IdFieldMapper(org.opensearch.index.mapper.IdFieldMapper) BytesReference(org.opensearch.common.bytes.BytesReference) IndexableField(org.apache.lucene.index.IndexableField) Term(org.apache.lucene.index.Term) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) HashMap(java.util.HashMap) MeanMetric(org.opensearch.common.metrics.MeanMetric) OpenSearchException(org.opensearch.OpenSearchException) SourceToParse(org.opensearch.index.mapper.SourceToParse) IndexShard(org.opensearch.index.shard.IndexShard) UNASSIGNED_SEQ_NO(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO) MapperService(org.opensearch.index.mapper.MapperService) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) Map(java.util.Map) XContentFactory(org.opensearch.common.xcontent.XContentFactory) ParsedDocument(org.opensearch.index.mapper.ParsedDocument) FieldsVisitor(org.opensearch.index.fieldvisitor.FieldsVisitor) Versions(org.opensearch.common.lucene.uid.Versions) AbstractIndexShardComponent(org.opensearch.index.shard.AbstractIndexShardComponent) IOException(java.io.IOException) DocumentField(org.opensearch.common.document.DocumentField) Mapper(org.opensearch.index.mapper.Mapper) UNASSIGNED_PRIMARY_TERM(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM) Nullable(org.opensearch.common.Nullable) SourceFieldMapper(org.opensearch.index.mapper.SourceFieldMapper) Tuple(org.opensearch.common.collect.Tuple) DocumentMapper(org.opensearch.index.mapper.DocumentMapper) XContentHelper(org.opensearch.common.xcontent.XContentHelper) Engine(org.opensearch.index.engine.Engine) TimeUnit(java.util.concurrent.TimeUnit) Sets(org.opensearch.common.util.set.Sets) VersionType(org.opensearch.index.VersionType) List(java.util.List) TranslogLeafReader(org.opensearch.index.engine.TranslogLeafReader) Stream(java.util.stream.Stream) DocValuesType(org.apache.lucene.index.DocValuesType) DocIdAndVersion(org.opensearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion) IndexSettings(org.opensearch.index.IndexSettings) FieldInfo(org.apache.lucene.index.FieldInfo) CounterMetric(org.opensearch.common.metrics.CounterMetric) XContentType(org.opensearch.common.xcontent.XContentType) XContentMapValues(org.opensearch.common.xcontent.support.XContentMapValues) IndexOptions(org.apache.lucene.index.IndexOptions) RoutingFieldMapper(org.opensearch.index.mapper.RoutingFieldMapper) Collections(java.util.Collections) Uid(org.opensearch.index.mapper.Uid) FetchSourceContext(org.opensearch.search.fetch.subphase.FetchSourceContext) CustomFieldsVisitor(org.opensearch.index.fieldvisitor.CustomFieldsVisitor) FieldsVisitor(org.opensearch.index.fieldvisitor.FieldsVisitor) DocumentField(org.opensearch.common.document.DocumentField) IdFieldMapper(org.opensearch.index.mapper.IdFieldMapper) Mapper(org.opensearch.index.mapper.Mapper) SourceFieldMapper(org.opensearch.index.mapper.SourceFieldMapper) DocumentMapper(org.opensearch.index.mapper.DocumentMapper) RoutingFieldMapper(org.opensearch.index.mapper.RoutingFieldMapper) XContentType(org.opensearch.common.xcontent.XContentType) ParsedDocument(org.opensearch.index.mapper.ParsedDocument) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) List(java.util.List) BytesReference(org.opensearch.common.bytes.BytesReference) DocumentMapper(org.opensearch.index.mapper.DocumentMapper) SourceToParse(org.opensearch.index.mapper.SourceToParse) IOException(java.io.IOException) IndexableField(org.apache.lucene.index.IndexableField) DocIdAndVersion(org.opensearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion) IndexableFieldType(org.apache.lucene.index.IndexableFieldType) OpenSearchException(org.opensearch.OpenSearchException) HashMap(java.util.HashMap) Map(java.util.Map) FieldInfo(org.apache.lucene.index.FieldInfo) Tuple(org.opensearch.common.collect.Tuple)

Example 100 with ParsedDocument

use of org.opensearch.index.mapper.ParsedDocument in project OpenSearch by opensearch-project.

the class IndexShard method prepareIndex.

public static Engine.Index prepareIndex(DocumentMapperForType docMapper, SourceToParse source, long seqNo, long primaryTerm, long version, VersionType versionType, Engine.Operation.Origin origin, long autoGeneratedIdTimestamp, boolean isRetry, long ifSeqNo, long ifPrimaryTerm) {
    long startTime = System.nanoTime();
    ParsedDocument doc = docMapper.getDocumentMapper().parse(source);
    if (docMapper.getMapping() != null) {
        doc.addDynamicMappingsUpdate(docMapper.getMapping());
    }
    Term uid = new Term(IdFieldMapper.NAME, Uid.encodeId(doc.id()));
    return new Engine.Index(uid, doc, seqNo, primaryTerm, version, versionType, origin, startTime, autoGeneratedIdTimestamp, isRetry, ifSeqNo, ifPrimaryTerm);
}
Also used : ParsedDocument(org.opensearch.index.mapper.ParsedDocument) Index(org.opensearch.index.Index) CheckIndex(org.apache.lucene.index.CheckIndex) Term(org.apache.lucene.index.Term)

Aggregations

ParsedDocument (org.opensearch.index.mapper.ParsedDocument)145 Matchers.containsString (org.hamcrest.Matchers.containsString)66 LongPoint (org.apache.lucene.document.LongPoint)43 IOException (java.io.IOException)41 IndexableField (org.apache.lucene.index.IndexableField)41 BytesArray (org.opensearch.common.bytes.BytesArray)41 Store (org.opensearch.index.store.Store)38 BytesRef (org.apache.lucene.util.BytesRef)35 MapperService (org.opensearch.index.mapper.MapperService)32 IndexSettings (org.opensearch.index.IndexSettings)31 AtomicLong (java.util.concurrent.atomic.AtomicLong)30 SourceToParse (org.opensearch.index.mapper.SourceToParse)30 HashSet (java.util.HashSet)29 DocumentMapper (org.opensearch.index.mapper.DocumentMapper)29 ArrayList (java.util.ArrayList)25 Map (java.util.Map)24 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)24 UncheckedIOException (java.io.UncheckedIOException)23 Arrays (java.util.Arrays)23 Collections (java.util.Collections)23