Search in sources :

Example 11 with CharsRef

use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.

the class BaseSynonymParserTestCase method assertEntryEquals.

/**
   * Helper method to validate synonym parsing.
   *
   * @param synonynMap  the generated synonym map after parsing
   * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
   *                    All spaces will be replaced by word separators.
   * @param includeOrig if synonyms should include original
   * @param synonyms    actual synonyms. All word separators are replaced with a single space.
   */
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
    word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
    BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
    assertNotNull("No synonyms found for: " + word, value);
    ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
    final int code = bytesReader.readVInt();
    final boolean keepOrig = (code & 0x1) == 0;
    assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig, includeOrig, keepOrig);
    final int count = code >>> 1;
    assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count, synonyms.length, count);
    Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));
    BytesRef scratchBytes = new BytesRef();
    for (int i = 0; i < count; i++) {
        synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
        String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
        assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
    }
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef) CharsRef(org.apache.lucene.util.CharsRef) HashSet(java.util.HashSet)

Example 12 with CharsRef

use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.

the class CompletionTokenStreamTest method testWithSynonyms.

@Test
public void testWithSynonyms() throws Exception {
    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
    Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    String input = "mykeyword another keyword";
    tokenStream.setReader(new StringReader(input));
    SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
    BytesRef payload = new BytesRef("payload");
    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter, true, false, 100);
    completionTokenStream.setPayload(payload);
    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
    String[] expectedOutputs = new String[2];
    CharsRefBuilder expectedOutput = new CharsRefBuilder();
    expectedOutput.append("mykeyword");
    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
    expectedOutput.append("another");
    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
    expectedOutput.append("keyword");
    expectedOutputs[0] = expectedOutput.toCharsRef().toString();
    expectedOutput.clear();
    expectedOutput.append("mysynonym");
    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
    expectedOutput.append("another");
    expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
    expectedOutput.append("keyword");
    expectedOutputs[1] = expectedOutput.toCharsRef().toString();
    assertTokenStreamContents(stream, expectedOutputs, null, null, new String[] { payload.utf8ToString(), payload.utf8ToString() }, new int[] { 1, 1 }, null, null);
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SynonymFilter(org.apache.lucene.analysis.synonym.SynonymFilter) CharsRef(org.apache.lucene.util.CharsRef) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) StringReader(java.io.StringReader) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) BytesRef(org.apache.lucene.util.BytesRef) Test(org.junit.Test)

Example 13 with CharsRef

use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.

the class CompletionTokenStreamTest method testWithSynonym.

@Test
public void testWithSynonym() throws Exception {
    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    tokenizer.setReader(new StringReader("mykeyword"));
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
    BytesRef payload = new BytesRef("payload");
    completionTokenStream.setPayload(payload);
    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
    assertTokenStreamContents(stream, new String[] { "mykeyword", "mysynonym" }, null, null, new String[] { payload.utf8ToString(), payload.utf8ToString() }, new int[] { 1, 1 }, null, null);
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) StringReader(java.io.StringReader) SynonymFilter(org.apache.lucene.analysis.synonym.SynonymFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharsRef(org.apache.lucene.util.CharsRef) BytesRef(org.apache.lucene.util.BytesRef) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) Test(org.junit.Test)

Example 14 with CharsRef

use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.

the class CompletionTokenStreamTest method testValidNumberOfExpansions.

@Test
public void testValidNumberOfExpansions() throws IOException {
    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    for (int i = 0; i < 256; i++) {
        builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
    }
    StringBuilder valueBuilder = new StringBuilder();
    for (int i = 0; i < 8; i++) {
        valueBuilder.append(i + 1);
        valueBuilder.append(" ");
    }
    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
    tokenizer.setReader(new StringReader(valueBuilder.toString()));
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
    CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
    completionTokenStream.setPayload(new BytesRef());
    PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
    stream.reset();
    CompletionTokenStream.BytesRefBuilderTermAttribute attr = stream.addAttribute(CompletionTokenStream.BytesRefBuilderTermAttribute.class);
    PositionIncrementAttribute posAttr = stream.addAttribute(PositionIncrementAttribute.class);
    int maxPos = 0;
    int count = 0;
    while (stream.incrementToken()) {
        count++;
        assertNotNull(attr.getBytesRef());
        assertTrue(attr.getBytesRef().length > 0);
        maxPos += posAttr.getPositionIncrement();
    }
    stream.close();
    assertEquals(count, 256);
    assertEquals(count, maxPos);
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SynonymFilter(org.apache.lucene.analysis.synonym.SynonymFilter) CharsRef(org.apache.lucene.util.CharsRef) SynonymMap(org.apache.lucene.analysis.synonym.SynonymMap) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) StringReader(java.io.StringReader) BytesRef(org.apache.lucene.util.BytesRef) Test(org.junit.Test)

Example 15 with CharsRef

use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.

the class SuggestComponent method process.

/** 
   * Responsible for using the specified suggester to get the suggestions 
   * for the query and write the results 
   * */
@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    LOG.info("SuggestComponent process with : " + params);
    if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) {
        return;
    }
    boolean buildAll = params.getBool(SUGGEST_BUILD_ALL, false);
    boolean reloadAll = params.getBool(SUGGEST_RELOAD_ALL, false);
    Set<SolrSuggester> querySuggesters;
    try {
        querySuggesters = getSuggesters(params);
    } catch (SolrException ex) {
        if (!buildAll && !reloadAll) {
            throw ex;
        } else {
            querySuggesters = new HashSet<>();
        }
    }
    String query = params.get(SUGGEST_Q);
    if (query == null) {
        query = rb.getQueryString();
        if (query == null) {
            query = params.get(CommonParams.Q);
        }
    }
    if (query != null) {
        int count = params.getInt(SUGGEST_COUNT, 1);
        boolean highlight = params.getBool(SUGGEST_HIGHLIGHT, false);
        boolean allTermsRequired = params.getBool(SUGGEST_ALL_TERMS_REQUIRED, true);
        String contextFilter = params.get(SUGGEST_CONTEXT_FILTER_QUERY);
        if (contextFilter != null) {
            contextFilter = contextFilter.trim();
            if (contextFilter.length() == 0) {
                contextFilter = null;
            }
        }
        SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count, contextFilter, allTermsRequired, highlight);
        Map<String, SimpleOrderedMap<NamedList<Object>>> namedListResults = new HashMap<>();
        for (SolrSuggester suggester : querySuggesters) {
            SuggesterResult suggesterResult = suggester.getSuggestions(options);
            toNamedList(suggesterResult, namedListResults);
        }
        rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResults);
    }
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SuggesterOptions(org.apache.solr.spelling.suggest.SuggesterOptions) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) CharsRef(org.apache.lucene.util.CharsRef) SuggesterResult(org.apache.solr.spelling.suggest.SuggesterResult) SolrParams(org.apache.solr.common.params.SolrParams) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrSuggester(org.apache.solr.spelling.suggest.SolrSuggester) SolrException(org.apache.solr.common.SolrException) HashSet(java.util.HashSet)

Aggregations

CharsRef (org.apache.lucene.util.CharsRef)27 BytesRef (org.apache.lucene.util.BytesRef)8 ArrayList (java.util.ArrayList)6 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)6 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)6 StringReader (java.io.StringReader)4 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)4 SynonymFilter (org.apache.lucene.analysis.synonym.SynonymFilter)4 SynonymMap (org.apache.lucene.analysis.synonym.SynonymMap)4 HashMap (java.util.HashMap)3 TokenStream (org.apache.lucene.analysis.TokenStream)3 IntsRef (org.apache.lucene.util.IntsRef)3 Test (org.junit.Test)3 ParseException (java.text.ParseException)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 Tokenizer (org.apache.lucene.analysis.Tokenizer)2