use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.
the class BaseSynonymParserTestCase method assertEntryEquals.
/**
* Helper method to validate synonym parsing.
*
* @param synonynMap the generated synonym map after parsing
* @param word word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
* All spaces will be replaced by word separators.
* @param includeOrig if synonyms should include original
* @param synonyms actual synonyms. All word separators are replaced with a single space.
*/
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
assertNotNull("No synonyms found for: " + word, value);
ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
final int code = bytesReader.readVInt();
final boolean keepOrig = (code & 0x1) == 0;
assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig, includeOrig, keepOrig);
final int count = code >>> 1;
assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count, synonyms.length, count);
Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));
BytesRef scratchBytes = new BytesRef();
for (int i = 0; i < count; i++) {
synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
}
}
use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.
the class CompletionTokenStreamTest method testWithSynonyms.
@Test
public void testWithSynonyms() throws Exception {
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
String input = "mykeyword another keyword";
tokenStream.setReader(new StringReader(input));
SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
BytesRef payload = new BytesRef("payload");
CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter, true, false, 100);
completionTokenStream.setPayload(payload);
PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
String[] expectedOutputs = new String[2];
CharsRefBuilder expectedOutput = new CharsRefBuilder();
expectedOutput.append("mykeyword");
expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
expectedOutput.append("another");
expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
expectedOutput.append("keyword");
expectedOutputs[0] = expectedOutput.toCharsRef().toString();
expectedOutput.clear();
expectedOutput.append("mysynonym");
expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
expectedOutput.append("another");
expectedOutput.append(((char) CompletionAnalyzer.SEP_LABEL));
expectedOutput.append("keyword");
expectedOutputs[1] = expectedOutput.toCharsRef().toString();
assertTokenStreamContents(stream, expectedOutputs, null, null, new String[] { payload.utf8ToString(), payload.utf8ToString() }, new int[] { 1, 1 }, null, null);
}
use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.
the class CompletionTokenStreamTest method testWithSynonym.
@Test
public void testWithSynonym() throws Exception {
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokenizer.setReader(new StringReader("mykeyword"));
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
BytesRef payload = new BytesRef("payload");
completionTokenStream.setPayload(payload);
PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
assertTokenStreamContents(stream, new String[] { "mykeyword", "mysynonym" }, null, null, new String[] { payload.utf8ToString(), payload.utf8ToString() }, new int[] { 1, 1 }, null, null);
}
use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.
the class CompletionTokenStreamTest method testValidNumberOfExpansions.
@Test
public void testValidNumberOfExpansions() throws IOException {
SynonymMap.Builder builder = new SynonymMap.Builder(true);
for (int i = 0; i < 256; i++) {
builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
}
StringBuilder valueBuilder = new StringBuilder();
for (int i = 0; i < 8; i++) {
valueBuilder.append(i + 1);
valueBuilder.append(" ");
}
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokenizer.setReader(new StringReader(valueBuilder.toString()));
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
CompletionTokenStream completionTokenStream = new CompletionTokenStream(filter);
completionTokenStream.setPayload(new BytesRef());
PayloadAttrToTypeAttrFilter stream = new PayloadAttrToTypeAttrFilter(completionTokenStream);
stream.reset();
CompletionTokenStream.BytesRefBuilderTermAttribute attr = stream.addAttribute(CompletionTokenStream.BytesRefBuilderTermAttribute.class);
PositionIncrementAttribute posAttr = stream.addAttribute(PositionIncrementAttribute.class);
int maxPos = 0;
int count = 0;
while (stream.incrementToken()) {
count++;
assertNotNull(attr.getBytesRef());
assertTrue(attr.getBytesRef().length > 0);
maxPos += posAttr.getPositionIncrement();
}
stream.close();
assertEquals(count, 256);
assertEquals(count, maxPos);
}
use of org.apache.lucene.util.CharsRef in project lucene-solr by apache.
the class SuggestComponent method process.
/**
* Responsible for using the specified suggester to get the suggestions
* for the query and write the results
* */
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
LOG.info("SuggestComponent process with : " + params);
if (!params.getBool(COMPONENT_NAME, false) || suggesters.isEmpty()) {
return;
}
boolean buildAll = params.getBool(SUGGEST_BUILD_ALL, false);
boolean reloadAll = params.getBool(SUGGEST_RELOAD_ALL, false);
Set<SolrSuggester> querySuggesters;
try {
querySuggesters = getSuggesters(params);
} catch (SolrException ex) {
if (!buildAll && !reloadAll) {
throw ex;
} else {
querySuggesters = new HashSet<>();
}
}
String query = params.get(SUGGEST_Q);
if (query == null) {
query = rb.getQueryString();
if (query == null) {
query = params.get(CommonParams.Q);
}
}
if (query != null) {
int count = params.getInt(SUGGEST_COUNT, 1);
boolean highlight = params.getBool(SUGGEST_HIGHLIGHT, false);
boolean allTermsRequired = params.getBool(SUGGEST_ALL_TERMS_REQUIRED, true);
String contextFilter = params.get(SUGGEST_CONTEXT_FILTER_QUERY);
if (contextFilter != null) {
contextFilter = contextFilter.trim();
if (contextFilter.length() == 0) {
contextFilter = null;
}
}
SuggesterOptions options = new SuggesterOptions(new CharsRef(query), count, contextFilter, allTermsRequired, highlight);
Map<String, SimpleOrderedMap<NamedList<Object>>> namedListResults = new HashMap<>();
for (SolrSuggester suggester : querySuggesters) {
SuggesterResult suggesterResult = suggester.getSuggestions(options);
toNamedList(suggesterResult, namedListResults);
}
rb.rsp.add(SuggesterResultLabels.SUGGEST, namedListResults);
}
}
Aggregations