use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class CustomPassageFormatterTests method testHtmlEncodeFormat.
public void testHtmlEncodeFormat() {
String content = "<b>This is a really cool highlighter.</b> Postings highlighter gives nice snippets back.";
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<em>", "</em>", new SimpleHTMLEncoder());
Passage[] passages = new Passage[2];
String match = "highlighter";
BytesRef matchBytesRef = new BytesRef(match);
Passage passage1 = new Passage();
int start = content.indexOf(match);
int end = start + match.length();
passage1.startOffset = 0;
//lets include the whitespace at the end to make sure we trim it
passage1.endOffset = end + 6;
passage1.addMatch(start, end, matchBytesRef);
passages[0] = passage1;
Passage passage2 = new Passage();
start = content.lastIndexOf(match);
end = start + match.length();
passage2.startOffset = passage1.endOffset;
passage2.endOffset = content.length();
passage2.addMatch(start, end, matchBytesRef);
passages[1] = passage2;
Snippet[] fragments = passageFormatter.format(passages, content);
assertThat(fragments, notNullValue());
assertThat(fragments.length, equalTo(2));
assertThat(fragments[0].getText(), equalTo("<b>This is a really cool <em>highlighter</em>.</b>"));
assertThat(fragments[1].getText(), equalTo("Postings <em>highlighter</em> gives nice snippets back."));
}
use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class NoisyChannelSpellCheckerTests method testMultiGenerator.
public void testMultiGenerator() throws IOException {
RAMDirectory dir = new RAMDirectory();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("body_ngram", new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
ShingleFilter tf = new ShingleFilter(t, 2, 3);
tf.setOutputUnigrams(false);
return new TokenStreamComponents(t, new LowerCaseFilter(tf));
}
});
mapping.put("body", new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t, new LowerCaseFilter(t));
}
});
mapping.put("body_reverse", new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new StandardTokenizer();
return new TokenStreamComponents(t, new ReverseStringFilter(new LowerCaseFilter(t)));
}
});
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriterConfig conf = new IndexWriterConfig(wrapper);
IndexWriter writer = new IndexWriter(dir, conf);
String[] strings = new String[] { "Xorr the God-Jewel", "Grog the God-Crusher", "Xorn", "Walter Newell", "Wanda Maximoff", "Captain America", "American Ace", "Wundarr the Aquarian", "Will o' the Wisp", "Xemnu the Titan", "Fantastic Four", "Quasar", "Quasar II" };
for (String line : strings) {
Document doc = new Document();
doc.add(new Field("body", line, TextField.TYPE_NOT_STORED));
doc.add(new Field("body_reverse", line, TextField.TYPE_NOT_STORED));
doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
}
DirectoryReader ir = DirectoryReader.open(writer);
LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1);
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
// only use forward with constant prefix
assertThat(corrections.length, equalTo(0));
corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("four the god jewel"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
// Test a special case where one of the suggest term is unchanged by the postFilter, 'II' here is unchanged by the reverse analyzer.
corrections = suggester.getCorrections(wrapper, new BytesRef("Quazar II"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("quasar ii"));
}
use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testMultiplePercolatorFields.
// multiple percolator fields are allowed in the mapping, but only one field can be used at index time.
public void testMultiplePercolatorFields() throws Exception {
String typeName = "another_type";
String percolatorMapper = XContentFactory.jsonBuilder().startObject().startObject(typeName).startObject("_field_names").field("enabled", false).endObject().startObject("properties").startObject("query_field1").field("type", "percolator").endObject().startObject("query_field2").field("type", "percolator").endObject().endObject().endObject().endObject().string();
mapperService.merge(typeName, new CompressedXContent(percolatorMapper), MapperService.MergeReason.MAPPING_UPDATE, true);
QueryBuilder queryBuilder = matchQuery("field", "value");
ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", jsonBuilder().startObject().field("query_field1", queryBuilder).field("query_field2", queryBuilder).endObject().bytes());
// also includes all other meta fields
assertThat(doc.rootDoc().getFields().size(), equalTo(14));
BytesRef queryBuilderAsBytes = doc.rootDoc().getField("query_field1.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
queryBuilderAsBytes = doc.rootDoc().getField("query_field2.query_builder_field").binaryValue();
assertQueryBuilder(queryBuilderAsBytes, queryBuilder);
}
use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testPercolatorFieldMapper.
public void testPercolatorFieldMapper() throws Exception {
addQueryMapping();
QueryBuilder queryBuilder = termQuery("field", "value");
ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject().field(fieldName, queryBuilder).endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name())[0].binaryValue().utf8ToString(), equalTo("field\0value"));
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(), equalTo(EXTRACTION_COMPLETE));
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
// add an query for which we don't extract terms from
queryBuilder = rangeQuery("field").from("a").to("z");
doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject().field(fieldName, queryBuilder).endObject().bytes());
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name()).length, equalTo(1));
assertThat(doc.rootDoc().getFields(fieldType.extractionResultField.name())[0].stringValue(), equalTo(EXTRACTION_FAILED));
assertThat(doc.rootDoc().getFields(fieldType.queryTermsField.name()).length, equalTo(0));
assertThat(doc.rootDoc().getFields(fieldType.queryBuilderField.name()).length, equalTo(1));
qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, queryBuilder);
}
use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.
the class PercolatorFieldMapperTests method testStoringQueries.
public void testStoringQueries() throws Exception {
addQueryMapping();
QueryBuilder[] queries = new QueryBuilder[] { termQuery("field", "value"), matchAllQuery(), matchQuery("field", "value"), matchPhraseQuery("field", "value"), prefixQuery("field", "v"), wildcardQuery("field", "v*"), rangeQuery("number_field").gte(0).lte(9), rangeQuery("date_field").from("2015-01-01T00:00").to("2015-01-01T00:00") };
for (QueryBuilder query : queries) {
ParsedDocument doc = mapperService.documentMapper(typeName).parse("test", typeName, "1", XContentFactory.jsonBuilder().startObject().field(fieldName, query).endObject().bytes());
BytesRef qbSource = doc.rootDoc().getFields(fieldType.queryBuilderField.name())[0].binaryValue();
assertQueryBuilder(qbSource, query);
}
}
Aggregations