Search in sources :

Example 46 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class Request method bulk.

static Request bulk(BulkRequest bulkRequest) throws IOException {
    Params parameters = Params.builder();
    parameters.withTimeout(bulkRequest.timeout());
    parameters.withRefreshPolicy(bulkRequest.getRefreshPolicy());
    // Bulk API only supports newline delimited JSON or Smile. Before executing
    // the bulk, we need to check that all requests have the same content-type
    // and this content-type is supported by the Bulk API.
    XContentType bulkContentType = null;
    for (int i = 0; i < bulkRequest.numberOfActions(); i++) {
        DocWriteRequest<?> request = bulkRequest.requests().get(i);
        DocWriteRequest.OpType opType = request.opType();
        if (opType == DocWriteRequest.OpType.INDEX || opType == DocWriteRequest.OpType.CREATE) {
            bulkContentType = enforceSameContentType((IndexRequest) request, bulkContentType);
        } else if (opType == DocWriteRequest.OpType.UPDATE) {
            UpdateRequest updateRequest = (UpdateRequest) request;
            if (updateRequest.doc() != null) {
                bulkContentType = enforceSameContentType(updateRequest.doc(), bulkContentType);
            }
            if (updateRequest.upsertRequest() != null) {
                bulkContentType = enforceSameContentType(updateRequest.upsertRequest(), bulkContentType);
            }
        }
    }
    if (bulkContentType == null) {
        bulkContentType = XContentType.JSON;
    }
    byte separator = bulkContentType.xContent().streamSeparator();
    ContentType requestContentType = ContentType.create(bulkContentType.mediaType());
    ByteArrayOutputStream content = new ByteArrayOutputStream();
    for (DocWriteRequest<?> request : bulkRequest.requests()) {
        DocWriteRequest.OpType opType = request.opType();
        try (XContentBuilder metadata = XContentBuilder.builder(bulkContentType.xContent())) {
            metadata.startObject();
            {
                metadata.startObject(opType.getLowercase());
                if (Strings.hasLength(request.index())) {
                    metadata.field("_index", request.index());
                }
                if (Strings.hasLength(request.type())) {
                    metadata.field("_type", request.type());
                }
                if (Strings.hasLength(request.id())) {
                    metadata.field("_id", request.id());
                }
                if (Strings.hasLength(request.routing())) {
                    metadata.field("_routing", request.routing());
                }
                if (Strings.hasLength(request.parent())) {
                    metadata.field("_parent", request.parent());
                }
                if (request.version() != Versions.MATCH_ANY) {
                    metadata.field("_version", request.version());
                }
                VersionType versionType = request.versionType();
                if (versionType != VersionType.INTERNAL) {
                    if (versionType == VersionType.EXTERNAL) {
                        metadata.field("_version_type", "external");
                    } else if (versionType == VersionType.EXTERNAL_GTE) {
                        metadata.field("_version_type", "external_gte");
                    } else if (versionType == VersionType.FORCE) {
                        metadata.field("_version_type", "force");
                    }
                }
                if (opType == DocWriteRequest.OpType.INDEX || opType == DocWriteRequest.OpType.CREATE) {
                    IndexRequest indexRequest = (IndexRequest) request;
                    if (Strings.hasLength(indexRequest.getPipeline())) {
                        metadata.field("pipeline", indexRequest.getPipeline());
                    }
                } else if (opType == DocWriteRequest.OpType.UPDATE) {
                    UpdateRequest updateRequest = (UpdateRequest) request;
                    if (updateRequest.retryOnConflict() > 0) {
                        metadata.field("_retry_on_conflict", updateRequest.retryOnConflict());
                    }
                    if (updateRequest.fetchSource() != null) {
                        metadata.field("_source", updateRequest.fetchSource());
                    }
                }
                metadata.endObject();
            }
            metadata.endObject();
            BytesRef metadataSource = metadata.bytes().toBytesRef();
            content.write(metadataSource.bytes, metadataSource.offset, metadataSource.length);
            content.write(separator);
        }
        BytesRef source = null;
        if (opType == DocWriteRequest.OpType.INDEX || opType == DocWriteRequest.OpType.CREATE) {
            IndexRequest indexRequest = (IndexRequest) request;
            BytesReference indexSource = indexRequest.source();
            XContentType indexXContentType = indexRequest.getContentType();
            try (XContentParser parser = XContentHelper.createParser(NamedXContentRegistry.EMPTY, indexSource, indexXContentType)) {
                try (XContentBuilder builder = XContentBuilder.builder(bulkContentType.xContent())) {
                    builder.copyCurrentStructure(parser);
                    source = builder.bytes().toBytesRef();
                }
            }
        } else if (opType == DocWriteRequest.OpType.UPDATE) {
            source = XContentHelper.toXContent((UpdateRequest) request, bulkContentType, false).toBytesRef();
        }
        if (source != null) {
            content.write(source.bytes, source.offset, source.length);
            content.write(separator);
        }
    }
    HttpEntity entity = new ByteArrayEntity(content.toByteArray(), 0, content.size(), requestContentType);
    return new Request(HttpPost.METHOD_NAME, "/_bulk", parameters.getParams(), entity);
}
Also used : BytesReference(org.elasticsearch.common.bytes.BytesReference) XContentType(org.elasticsearch.common.xcontent.XContentType) ContentType(org.apache.http.entity.ContentType) HttpEntity(org.apache.http.HttpEntity) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) WriteRequest(org.elasticsearch.action.support.WriteRequest) IndexRequest(org.elasticsearch.action.index.IndexRequest) GetRequest(org.elasticsearch.action.get.GetRequest) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IndexRequest(org.elasticsearch.action.index.IndexRequest) VersionType(org.elasticsearch.index.VersionType) XContentType(org.elasticsearch.common.xcontent.XContentType) ByteArrayEntity(org.apache.http.entity.ByteArrayEntity) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) BytesRef(org.apache.lucene.util.BytesRef) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Example 47 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class Request method index.

static Request index(IndexRequest indexRequest) {
    String method = Strings.hasLength(indexRequest.id()) ? HttpPut.METHOD_NAME : HttpPost.METHOD_NAME;
    boolean isCreate = (indexRequest.opType() == DocWriteRequest.OpType.CREATE);
    String endpoint = endpoint(indexRequest.index(), indexRequest.type(), indexRequest.id(), isCreate ? "_create" : null);
    Params parameters = Params.builder();
    parameters.withRouting(indexRequest.routing());
    parameters.withParent(indexRequest.parent());
    parameters.withTimeout(indexRequest.timeout());
    parameters.withVersion(indexRequest.version());
    parameters.withVersionType(indexRequest.versionType());
    parameters.withPipeline(indexRequest.getPipeline());
    parameters.withRefreshPolicy(indexRequest.getRefreshPolicy());
    parameters.withWaitForActiveShards(indexRequest.waitForActiveShards());
    BytesRef source = indexRequest.source().toBytesRef();
    ContentType contentType = ContentType.create(indexRequest.getContentType().mediaType());
    HttpEntity entity = new ByteArrayEntity(source.bytes, source.offset, source.length, contentType);
    return new Request(method, endpoint, parameters.getParams(), entity);
}
Also used : XContentType(org.elasticsearch.common.xcontent.XContentType) ContentType(org.apache.http.entity.ContentType) HttpEntity(org.apache.http.HttpEntity) ByteArrayEntity(org.apache.http.entity.ByteArrayEntity) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) WriteRequest(org.elasticsearch.action.support.WriteRequest) IndexRequest(org.elasticsearch.action.index.IndexRequest) GetRequest(org.elasticsearch.action.get.GetRequest) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) BytesRef(org.apache.lucene.util.BytesRef)

Example 48 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class Request method update.

static Request update(UpdateRequest updateRequest) throws IOException {
    String endpoint = endpoint(updateRequest.index(), updateRequest.type(), updateRequest.id(), "_update");
    Params parameters = Params.builder();
    parameters.withRouting(updateRequest.routing());
    parameters.withParent(updateRequest.parent());
    parameters.withTimeout(updateRequest.timeout());
    parameters.withRefreshPolicy(updateRequest.getRefreshPolicy());
    parameters.withWaitForActiveShards(updateRequest.waitForActiveShards());
    parameters.withDocAsUpsert(updateRequest.docAsUpsert());
    parameters.withFetchSourceContext(updateRequest.fetchSource());
    parameters.withRetryOnConflict(updateRequest.retryOnConflict());
    parameters.withVersion(updateRequest.version());
    parameters.withVersionType(updateRequest.versionType());
    // The Java API allows update requests with different content types
    // set for the partial document and the upsert document. This client
    // only accepts update requests that have the same content types set
    // for both doc and upsert.
    XContentType xContentType = null;
    if (updateRequest.doc() != null) {
        xContentType = updateRequest.doc().getContentType();
    }
    if (updateRequest.upsertRequest() != null) {
        XContentType upsertContentType = updateRequest.upsertRequest().getContentType();
        if ((xContentType != null) && (xContentType != upsertContentType)) {
            throw new IllegalStateException("Update request cannot have different content types for doc [" + xContentType + "]" + " and upsert [" + upsertContentType + "] documents");
        } else {
            xContentType = upsertContentType;
        }
    }
    if (xContentType == null) {
        xContentType = Requests.INDEX_CONTENT_TYPE;
    }
    BytesRef source = XContentHelper.toXContent(updateRequest, xContentType, false).toBytesRef();
    HttpEntity entity = new ByteArrayEntity(source.bytes, source.offset, source.length, ContentType.create(xContentType.mediaType()));
    return new Request(HttpPost.METHOD_NAME, endpoint, parameters.getParams(), entity);
}
Also used : XContentType(org.elasticsearch.common.xcontent.XContentType) HttpEntity(org.apache.http.HttpEntity) ByteArrayEntity(org.apache.http.entity.ByteArrayEntity) DeleteRequest(org.elasticsearch.action.delete.DeleteRequest) WriteRequest(org.elasticsearch.action.support.WriteRequest) IndexRequest(org.elasticsearch.action.index.IndexRequest) GetRequest(org.elasticsearch.action.get.GetRequest) UpdateRequest(org.elasticsearch.action.update.UpdateRequest) DocWriteRequest(org.elasticsearch.action.DocWriteRequest) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) BytesRef(org.apache.lucene.util.BytesRef)

Example 49 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class CustomPassageFormatterTests method testHtmlEncodeFormat.

public void testHtmlEncodeFormat() {
    String content = "<b>This is a really cool highlighter.</b> Unified highlighter gives nice snippets back.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
    Passage[] passages = new Passage[2];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.setStartOffset(0);
    //lets include the whitespace at the end to make sure we trim it
    passage1.setEndOffset(end + 6);
    passage1.addMatch(start, end, matchBytesRef);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.setStartOffset(passage1.getEndOffset());
    passage2.setEndOffset(content.length());
    passage2.addMatch(start, end, matchBytesRef);
    passages[1] = passage2;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(2));
    assertThat(fragments[0].getText(), equalTo("&lt;b&gt;This is a really cool <em>highlighter</em>.&lt;&#x2F;b&gt;"));
    assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
}
Also used : SimpleHTMLEncoder(org.apache.lucene.search.highlight.SimpleHTMLEncoder) Snippet(org.apache.lucene.search.highlight.Snippet) BytesRef(org.apache.lucene.util.BytesRef)

Example 50 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class CustomPassageFormatterTests method testSimpleFormat.

public void testSimpleFormat() {
    String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here.";
    CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new DefaultEncoder());
    Passage[] passages = new Passage[3];
    String match = "highlighter";
    BytesRef matchBytesRef = new BytesRef(match);
    Passage passage1 = new Passage();
    int start = content.indexOf(match);
    int end = start + match.length();
    passage1.setStartOffset(0);
    //lets include the whitespace at the end to make sure we trim it
    passage1.setEndOffset(end + 2);
    passage1.addMatch(start, end, matchBytesRef);
    passages[0] = passage1;
    Passage passage2 = new Passage();
    start = content.lastIndexOf(match);
    end = start + match.length();
    passage2.setStartOffset(passage1.getEndOffset());
    passage2.setEndOffset(end + 26);
    passage2.addMatch(start, end, matchBytesRef);
    passages[1] = passage2;
    Passage passage3 = new Passage();
    passage3.setStartOffset(passage2.getEndOffset());
    passage3.setEndOffset(content.length());
    passages[2] = passage3;
    Snippet[] fragments = passageFormatter.format(passages, content);
    assertThat(fragments, notNullValue());
    assertThat(fragments.length, equalTo(3));
    assertThat(fragments[0].getText(), equalTo("This is a really cool <em>highlighter</em>."));
    assertThat(fragments[0].isHighlighted(), equalTo(true));
    assertThat(fragments[1].getText(), equalTo("Unified <em>highlighter</em> gives nice snippets back."));
    assertThat(fragments[1].isHighlighted(), equalTo(true));
    assertThat(fragments[2].getText(), equalTo("No matches here."));
    assertThat(fragments[2].isHighlighted(), equalTo(false));
}
Also used : DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Snippet(org.apache.lucene.search.highlight.Snippet) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BytesRef (org.apache.lucene.util.BytesRef)1449 Document (org.apache.lucene.document.Document)410 Directory (org.apache.lucene.store.Directory)370 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)266 ArrayList (java.util.ArrayList)186 Test (org.junit.Test)182 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)164 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)152 Term (org.apache.lucene.index.Term)124 Analyzer (org.apache.lucene.analysis.Analyzer)121 IndexReader (org.apache.lucene.index.IndexReader)121 TermsEnum (org.apache.lucene.index.TermsEnum)116 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)110 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)105 IOException (java.io.IOException)104 Field (org.apache.lucene.document.Field)101 StringField (org.apache.lucene.document.StringField)101 CrateUnitTest (io.crate.test.integration.CrateUnitTest)95 TextField (org.apache.lucene.document.TextField)95 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)87