Search in sources :

Example 6 with FieldAnalysisRequest

use of org.apache.solr.client.solrj.request.FieldAnalysisRequest in project lucene-solr by apache.

the class FieldAnalysisRequestHandlerTest method testSpatial.

@Test
public void testSpatial() throws Exception {
    FieldAnalysisRequest request = new FieldAnalysisRequest();
    request.addFieldType("location_rpt");
    request.setFieldValue("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))");
    NamedList<NamedList> result = handler.handleAnalysisRequest(request, h.getCore().getLatestSchema());
    NamedList<List<NamedList>> tokens = (NamedList<List<NamedList>>) ((NamedList) result.get("field_types").get("location_rpt")).get("index");
    List<NamedList> tokenList = tokens.get("org.apache.lucene.spatial.prefix.BytesRefIteratorTokenStream");
    List<String> vals = new ArrayList<>(tokenList.size());
    for (NamedList v : tokenList) {
        vals.add((String) v.get("text"));
    }
    Collections.sort(vals);
    assertEquals("[s, s7, s7w, s7w1+, s9, s9v, s9v2+, sp, spp, spp5+, sv, svk, svk6+]", vals.toString());
}
Also used : NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) FieldAnalysisRequest(org.apache.solr.client.solrj.request.FieldAnalysisRequest) Test(org.junit.Test)

Example 7 with FieldAnalysisRequest

use of org.apache.solr.client.solrj.request.FieldAnalysisRequest in project lucene-solr by apache.

the class FieldAnalysisRequestHandlerTest method testCustomAttribute.

//See SOLR-8460
@Test
public void testCustomAttribute() throws Exception {
    FieldAnalysisRequest request = new FieldAnalysisRequest();
    request.addFieldType("skutype1");
    request.setFieldValue("hi, 3456-12 a Test");
    request.setShowMatch(false);
    FieldType fieldType = new TextField();
    Analyzer analyzer = new TokenizerChain(new TokenizerFactory(Collections.emptyMap()) {

        @Override
        public Tokenizer create(AttributeFactory factory) {
            return new CustomTokenizer(factory);
        }
    }, new TokenFilterFactory[] { new TokenFilterFactory(Collections.emptyMap()) {

        @Override
        public TokenStream create(TokenStream input) {
            return new CustomTokenFilter(input);
        }
    } });
    fieldType.setIndexAnalyzer(analyzer);
    NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
    // just test that we see "900" in the flags attribute here
    List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
    // '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
    assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) TokenizerFactory(org.apache.lucene.analysis.util.TokenizerFactory) NamedList(org.apache.solr.common.util.NamedList) AttributeFactory(org.apache.lucene.util.AttributeFactory) Analyzer(org.apache.lucene.analysis.Analyzer) TokenFilterFactory(org.apache.lucene.analysis.util.TokenFilterFactory) FieldType(org.apache.solr.schema.FieldType) TokenizerChain(org.apache.solr.analysis.TokenizerChain) TextField(org.apache.solr.schema.TextField) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) FieldAnalysisRequest(org.apache.solr.client.solrj.request.FieldAnalysisRequest) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Test(org.junit.Test)

Example 8 with FieldAnalysisRequest

use of org.apache.solr.client.solrj.request.FieldAnalysisRequest in project lucene-solr by apache.

the class FieldAnalysisRequestHandlerTest method testPositionHistoryWithWDGF.

@Test
public void testPositionHistoryWithWDGF() throws Exception {
    FieldAnalysisRequest request = new FieldAnalysisRequest();
    request.addFieldType("skutype1");
    request.setFieldValue("hi, 3456-12 a Test");
    request.setShowMatch(false);
    NamedList<NamedList> result = handler.handleAnalysisRequest(request, h.getCore().getLatestSchema());
    assertTrue("result is null and it shouldn't be", result != null);
    NamedList<NamedList> fieldTypes = result.get("field_types");
    assertNotNull("field_types should never be null", fieldTypes);
    NamedList<NamedList> textType = fieldTypes.get("skutype1");
    assertNotNull("expecting result for field type 'skutype1'", textType);
    NamedList<List<NamedList>> indexPart = textType.get("index");
    assertNotNull("expecting an index token analysis for field type 'skutype1'", indexPart);
    List<NamedList> tokenList = indexPart.get(MockTokenizer.class.getName());
    assertNotNull("Expcting MockTokenizer analysis breakdown", tokenList);
    assertEquals(4, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("hi,", null, "word", 0, 3, 1, new int[] { 1 }, null, false));
    assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, new int[] { 2 }, null, false));
    assertToken(tokenList.get(2), new TokenInfo("a", null, "word", 12, 13, 3, new int[] { 3 }, null, false));
    assertToken(tokenList.get(3), new TokenInfo("Test", null, "word", 14, 18, 4, new int[] { 4 }, null, false));
    tokenList = indexPart.get("org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter");
    assertNotNull("Expcting WordDelimiterGraphFilter analysis breakdown", tokenList);
    assertEquals(6, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[] { 1, 1 }, null, false));
    assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[] { 2, 2 }, null, false));
    assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[] { 2, 2 }, null, false));
    assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[] { 2, 3 }, null, false));
    assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[] { 3, 4 }, null, false));
    assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[] { 4, 5 }, null, false));
    tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
    assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
    assertEquals(6, tokenList.size());
    assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[] { 1, 1, 1 }, null, false));
    assertToken(tokenList.get(1), new TokenInfo("345612", null, "word", 4, 11, 2, new int[] { 2, 2, 2 }, null, false));
    assertToken(tokenList.get(2), new TokenInfo("3456", null, "word", 4, 8, 2, new int[] { 2, 2, 2 }, null, false));
    assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[] { 2, 3, 3 }, null, false));
    assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[] { 3, 4, 4 }, null, false));
    assertToken(tokenList.get(5), new TokenInfo("test", null, "word", 14, 18, 5, new int[] { 4, 5, 5 }, null, false));
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) FieldAnalysisRequest(org.apache.solr.client.solrj.request.FieldAnalysisRequest) Test(org.junit.Test)

Aggregations

FieldAnalysisRequest (org.apache.solr.client.solrj.request.FieldAnalysisRequest)8 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 NamedList (org.apache.solr.common.util.NamedList)5 IOException (java.io.IOException)2 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)2 SolrException (org.apache.solr.common.SolrException)2 Reader (java.io.Reader)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 Tokenizer (org.apache.lucene.analysis.Tokenizer)1 WhitespaceTokenizer (org.apache.lucene.analysis.core.WhitespaceTokenizer)1 TokenFilterFactory (org.apache.lucene.analysis.util.TokenFilterFactory)1 TokenizerFactory (org.apache.lucene.analysis.util.TokenizerFactory)1 AttributeFactory (org.apache.lucene.util.AttributeFactory)1 TokenizerChain (org.apache.solr.analysis.TokenizerChain)1 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)1 SolrParams (org.apache.solr.common.params.SolrParams)1 ContentStream (org.apache.solr.common.util.ContentStream)1