Search in sources :

Example 1 with AnalyzeResponse

use of org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse in project elasticsearch by elastic.

the class TransportAnalyzeActionTests method testWithIndexAnalyzers.

public void testWithIndexAnalyzers() throws IOException {
    AnalyzeRequest request = new AnalyzeRequest();
    request.analyzer("standard");
    request.text("the quick brown fox");
    request.analyzer("custom_analyzer");
    request.text("the qu1ck brown fox");
    AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
    List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
    assertEquals(4, tokens.size());
    request.analyzer("whitespace");
    request.text("the qu1ck brown fox-dog");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(4, tokens.size());
    request.analyzer("custom_analyzer");
    request.text("the qu1ck brown fox-dog");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(5, tokens.size());
    request.analyzer(null);
    request.tokenizer("whitespace");
    request.addTokenFilter("lowercase");
    request.addTokenFilter("wordDelimiter");
    request.text("the qu1ck brown fox-dog");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(5, tokens.size());
    assertEquals("the", tokens.get(0).getTerm());
    assertEquals("qu1ck", tokens.get(1).getTerm());
    assertEquals("brown", tokens.get(2).getTerm());
    assertEquals("fox", tokens.get(3).getTerm());
    assertEquals("dog", tokens.get(4).getTerm());
    request.analyzer(null);
    request.tokenizer("trigram");
    request.addTokenFilter("synonym");
    request.text("kimchy");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(2, tokens.size());
    assertEquals("sha", tokens.get(0).getTerm());
    assertEquals("hay", tokens.get(1).getTerm());
}
Also used : AnalyzeRequest(org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Example 2 with AnalyzeResponse

use of org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse in project elasticsearch by elastic.

the class TransportAnalyzeActionTests method testNoIndexAnalyzers.

public void testNoIndexAnalyzers() throws IOException {
    AnalyzeRequest request = new AnalyzeRequest();
    request.analyzer("standard");
    request.text("the quick brown fox");
    AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, null, registry, environment);
    List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
    assertEquals(4, tokens.size());
    request.analyzer(null);
    request.tokenizer("whitespace");
    request.addTokenFilter("lowercase");
    request.addTokenFilter("word_delimiter");
    request.text("the qu1ck brown fox");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(6, tokens.size());
    assertEquals("qu", tokens.get(1).getTerm());
    assertEquals("1", tokens.get(2).getTerm());
    assertEquals("ck", tokens.get(3).getTerm());
    request.analyzer(null);
    request.tokenizer("whitespace");
    request.addCharFilter("html_strip");
    request.addTokenFilter("lowercase");
    request.addTokenFilter("word_delimiter");
    request.text("<p>the qu1ck brown fox</p>");
    analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
    tokens = analyze.getTokens();
    assertEquals(6, tokens.size());
    assertEquals("the", tokens.get(0).getTerm());
    assertEquals("qu", tokens.get(1).getTerm());
    assertEquals("1", tokens.get(2).getTerm());
    assertEquals("ck", tokens.get(3).getTerm());
    assertEquals("brown", tokens.get(4).getTerm());
    assertEquals("fox", tokens.get(5).getTerm());
}
Also used : AnalyzeRequest(org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Example 3 with AnalyzeResponse

use of org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse in project elasticsearch by elastic.

the class AnalyzeActionIT method testCustomTokenFilterInRequest.

public void testCustomTokenFilterInRequest() throws Exception {
    Map<String, Object> stopFilterSettings = new HashMap<>();
    stopFilterSettings.put("type", "stop");
    stopFilterSettings.put("stopwords", new String[] { "foo", "buzz" });
    AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setText("Foo buzz test").setTokenizer("whitespace").addTokenFilter("lowercase").addTokenFilter(stopFilterSettings).setExplain(true).get();
    //tokenizer
    assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("whitespace"));
    assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("Foo"));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(3));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPositionLength(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("buzz"));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(4));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(8));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPositionLength(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("test"));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(9));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(13));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2));
    assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPositionLength(), equalTo(1));
    // tokenfilter(lowercase)
    assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("foo"));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(3));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPositionLength(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getTerm(), equalTo("buzz"));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getStartOffset(), equalTo(4));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getEndOffset(), equalTo(8));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getPosition(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getPositionLength(), equalTo(1));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("test"));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getStartOffset(), equalTo(9));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getEndOffset(), equalTo(13));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPosition(), equalTo(2));
    assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPositionLength(), equalTo(1));
    // tokenfilter({"type": "stop", "stopwords": ["foo", "buzz"]})
    assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("_anonymous_tokenfilter_[1]"));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(1));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getTerm(), equalTo("test"));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getStartOffset(), equalTo(9));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getEndOffset(), equalTo(13));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPosition(), equalTo(2));
    assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPositionLength(), equalTo(1));
}
Also used : HashMap(java.util.HashMap) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Example 4 with AnalyzeResponse

use of org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse in project elasticsearch by elastic.

the class AnalyzeActionIT method testAnalyzerWithMultiValues.

public void testAnalyzerWithMultiValues() throws Exception {
    assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
    ensureGreen();
    client().admin().indices().preparePutMapping("test").setType("document").setSource("simple", "type=text,analyzer=simple,position_increment_gap=100").get();
    String[] texts = new String[] { "THIS IS A TEST", "THE SECOND TEXT" };
    final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze();
    requestBuilder.setText(texts);
    requestBuilder.setIndex(indexOrAlias());
    requestBuilder.setField("simple");
    AnalyzeResponse analyzeResponse = requestBuilder.get();
    assertThat(analyzeResponse.getTokens().size(), equalTo(7));
    AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3);
    assertThat(token.getTerm(), equalTo("test"));
    assertThat(token.getPosition(), equalTo(3));
    assertThat(token.getStartOffset(), equalTo(10));
    assertThat(token.getEndOffset(), equalTo(14));
    assertThat(token.getPositionLength(), equalTo(1));
    token = analyzeResponse.getTokens().get(5);
    assertThat(token.getTerm(), equalTo("second"));
    assertThat(token.getPosition(), equalTo(105));
    assertThat(token.getStartOffset(), equalTo(19));
    assertThat(token.getEndOffset(), equalTo(25));
    assertThat(token.getPositionLength(), equalTo(1));
}
Also used : Alias(org.elasticsearch.action.admin.indices.alias.Alias) AnalyzeRequestBuilder(org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Example 5 with AnalyzeResponse

use of org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse in project elasticsearch by elastic.

the class AnalyzeActionIT method testAnalyzeWithNonDefaultPostionLength.

public void testAnalyzeWithNonDefaultPostionLength() throws Exception {
    assertAcked(prepareCreate("test").addAlias(new Alias("alias")).setSettings(Settings.builder().put(indexSettings()).put("index.analysis.filter.syns.type", "synonym").putArray("index.analysis.filter.syns.synonyms", "wtf, what the fudge").put("index.analysis.analyzer.custom_syns.tokenizer", "standard").putArray("index.analysis.analyzer.custom_syns.filter", "lowercase", "syns")));
    ensureGreen();
    AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("say what the fudge").setIndex("test").setAnalyzer("custom_syns").get();
    assertThat(analyzeResponse.getTokens().size(), equalTo(5));
    AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
    assertThat(token.getTerm(), equalTo("say"));
    assertThat(token.getPosition(), equalTo(0));
    assertThat(token.getStartOffset(), equalTo(0));
    assertThat(token.getEndOffset(), equalTo(3));
    assertThat(token.getPositionLength(), equalTo(1));
    token = analyzeResponse.getTokens().get(1);
    assertThat(token.getTerm(), equalTo("what"));
    assertThat(token.getPosition(), equalTo(1));
    assertThat(token.getStartOffset(), equalTo(4));
    assertThat(token.getEndOffset(), equalTo(8));
    assertThat(token.getPositionLength(), equalTo(1));
    token = analyzeResponse.getTokens().get(2);
    assertThat(token.getTerm(), equalTo("wtf"));
    assertThat(token.getPosition(), equalTo(1));
    assertThat(token.getStartOffset(), equalTo(4));
    assertThat(token.getEndOffset(), equalTo(18));
    assertThat(token.getPositionLength(), equalTo(3));
    token = analyzeResponse.getTokens().get(3);
    assertThat(token.getTerm(), equalTo("the"));
    assertThat(token.getPosition(), equalTo(2));
    assertThat(token.getStartOffset(), equalTo(9));
    assertThat(token.getEndOffset(), equalTo(12));
    assertThat(token.getPositionLength(), equalTo(1));
    token = analyzeResponse.getTokens().get(4);
    assertThat(token.getTerm(), equalTo("fudge"));
    assertThat(token.getPosition(), equalTo(3));
    assertThat(token.getStartOffset(), equalTo(13));
    assertThat(token.getEndOffset(), equalTo(18));
    assertThat(token.getPositionLength(), equalTo(1));
}
Also used : Alias(org.elasticsearch.action.admin.indices.alias.Alias) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Aggregations

AnalyzeResponse (org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)25 Alias (org.elasticsearch.action.admin.indices.alias.Alias)10 AnalyzeRequest (org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest)4 HashMap (java.util.HashMap)3 AnalyzeRequestBuilder (org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder)2 AnalyzeToken (org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken)2 SearchException (core.framework.search.SearchException)1 StopWatch (core.framework.util.StopWatch)1 ArrayList (java.util.ArrayList)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1