use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.
the class HighlightField method writeTo.
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
if (fragments == null) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeVInt(fragments.length);
for (Text fragment : fragments) {
out.writeText(fragment);
}
}
}
use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.
the class PhraseSuggester method innerExecute.
/*
* More Ideas:
* - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton?
* - add ability to build different error models maybe based on a confusion matrix?
* - try to combine a token with its subsequent token to find / detect word splits (optional)
* - for this to work we need some way to defined the position length of a candidate
* - phonetic filters could be interesting here too for candidate selection
*/
@Override
public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
final IndexReader indexReader = searcher.getIndexReader();
List<PhraseSuggestionContext.DirectCandidateGenerator> generators = suggestion.generators();
final int numGenerators = generators.size();
final List<CandidateGenerator> gens = new ArrayList<>(generators.size());
for (int i = 0; i < numGenerators; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker();
Terms terms = MultiFields.getTerms(indexReader, generator.field());
if (terms != null) {
gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter(), terms));
}
}
final String suggestField = suggestion.getField();
final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField);
if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator();
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Result checkerResult;
try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
}
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
response.addTerm(resultEntry);
final BytesRefBuilder byteSpare = new BytesRefBuilder();
final Function<Map<String, Object>, ExecutableScript> collateScript = suggestion.getCollateQueryScript();
final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
for (int i = 0; i < checkerResult.corrections.length; i++) {
Correction correction = checkerResult.corrections[i];
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, null, null));
boolean collateMatch = true;
if (collateScript != null) {
// Checks if the template query collateScript yields any documents
// from the index for a correction, collateMatch is updated
final Map<String, Object> vars = suggestion.getCollateScriptParams();
vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString());
QueryShardContext shardContext = suggestion.getShardContext();
final ExecutableScript executable = collateScript.apply(vars);
final BytesReference querySource = (BytesReference) executable.run();
try (XContentParser parser = XContentFactory.xContent(querySource).createParser(shardContext.getXContentRegistry(), querySource)) {
QueryBuilder innerQueryBuilder = shardContext.newParseContext(parser).parseInnerQueryBuilder();
final ParsedQuery parsedQuery = shardContext.toQuery(innerQueryBuilder);
collateMatch = Lucene.exists(searcher, parsedQuery.query());
}
}
if (!collateMatch && !collatePrune) {
continue;
}
Text phrase = new Text(spare.toString());
Text highlighted = null;
if (suggestion.getPreTag() != null) {
spare.copyUTF8Bytes(correction.join(SEPARATOR, byteSpare, suggestion.getPreTag(), suggestion.getPostTag()));
highlighted = new Text(spare.toString());
}
if (collatePrune) {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score), collateMatch));
} else {
resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
}
}
} else {
response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
}
return response;
}
use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.
the class ExpandSearchPhaseTests method testFailOneItemFailsEntirePhase.
public void testFailOneItemFailsEntirePhase() throws IOException {
AtomicBoolean executedMultiSearch = new AtomicBoolean(false);
SearchHits collapsedHits = new SearchHits(new SearchHit[] { new SearchHit(2, "ID", new Text("type"), Collections.emptyMap()), new SearchHit(3, "ID", new Text("type"), Collections.emptyMap()) }, 1, 1.0F);
MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(1);
String collapseValue = randomBoolean() ? null : "boom";
mockSearchPhaseContext.getRequest().source(new SearchSourceBuilder().collapse(new CollapseBuilder("someField").setInnerHits(new InnerHitBuilder().setName("foobarbaz"))));
mockSearchPhaseContext.searchTransport = new SearchTransportService(Settings.builder().put("search.remote.connect", false).build(), null, null) {
@Override
void sendExecuteMultiSearch(MultiSearchRequest request, SearchTask task, ActionListener<MultiSearchResponse> listener) {
assertTrue(executedMultiSearch.compareAndSet(false, true));
InternalSearchResponse internalSearchResponse = new InternalSearchResponse(collapsedHits, null, null, null, false, null, 1);
SearchResponse response = mockSearchPhaseContext.buildSearchResponse(internalSearchResponse, null);
listener.onResponse(new MultiSearchResponse(new MultiSearchResponse.Item[] { new MultiSearchResponse.Item(null, new RuntimeException("boom")), new MultiSearchResponse.Item(response, null) }));
}
};
SearchHits hits = new SearchHits(new SearchHit[] { new SearchHit(1, "ID", new Text("type"), Collections.singletonMap("someField", new SearchHitField("someField", Collections.singletonList(collapseValue)))), new SearchHit(2, "ID2", new Text("type"), Collections.singletonMap("someField", new SearchHitField("someField", Collections.singletonList(collapseValue)))) }, 1, 1.0F);
InternalSearchResponse internalSearchResponse = new InternalSearchResponse(hits, null, null, null, false, null, 1);
SearchResponse response = mockSearchPhaseContext.buildSearchResponse(internalSearchResponse, null);
AtomicReference<SearchResponse> reference = new AtomicReference<>();
ExpandSearchPhase phase = new ExpandSearchPhase(mockSearchPhaseContext, response, r -> new SearchPhase("test") {
@Override
public void run() throws IOException {
reference.set(r);
}
});
phase.run();
assertThat(mockSearchPhaseContext.phaseFailure.get(), Matchers.instanceOf(RuntimeException.class));
assertEquals("boom", mockSearchPhaseContext.phaseFailure.get().getMessage());
assertNotNull(mockSearchPhaseContext.phaseFailure.get());
assertNull(reference.get());
assertEquals(0, mockSearchPhaseContext.phasesExecuted.get());
}
use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.
the class ExpandSearchPhaseTests method testSkipPhase.
public void testSkipPhase() throws IOException {
MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(1);
mockSearchPhaseContext.searchTransport = new SearchTransportService(Settings.builder().put("search.remote.connect", false).build(), null, null) {
@Override
void sendExecuteMultiSearch(MultiSearchRequest request, SearchTask task, ActionListener<MultiSearchResponse> listener) {
fail("no collapsing here");
}
};
SearchHits hits = new SearchHits(new SearchHit[] { new SearchHit(1, "ID", new Text("type"), Collections.singletonMap("someField", new SearchHitField("someField", Collections.singletonList(null)))), new SearchHit(2, "ID2", new Text("type"), Collections.singletonMap("someField", new SearchHitField("someField", Collections.singletonList(null)))) }, 1, 1.0F);
InternalSearchResponse internalSearchResponse = new InternalSearchResponse(hits, null, null, null, false, null, 1);
SearchResponse response = mockSearchPhaseContext.buildSearchResponse(internalSearchResponse, null);
AtomicReference<SearchResponse> reference = new AtomicReference<>();
ExpandSearchPhase phase = new ExpandSearchPhase(mockSearchPhaseContext, response, r -> new SearchPhase("test") {
@Override
public void run() throws IOException {
reference.set(r);
}
});
phase.run();
mockSearchPhaseContext.assertNoFailure();
assertNotNull(reference.get());
assertEquals(1, mockSearchPhaseContext.phasesExecuted.get());
}
use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.
the class SearchPhaseControllerTests method generateQueryResults.
private AtomicArray<QuerySearchResultProvider> generateQueryResults(int nShards, List<CompletionSuggestion> suggestions, int searchHitsSize, boolean useConstantScore) {
AtomicArray<QuerySearchResultProvider> queryResults = new AtomicArray<>(nShards);
for (int shardIndex = 0; shardIndex < nShards; shardIndex++) {
QuerySearchResult querySearchResult = new QuerySearchResult(shardIndex, new SearchShardTarget("", new Index("", ""), shardIndex));
TopDocs topDocs = new TopDocs(0, new ScoreDoc[0], 0);
if (searchHitsSize > 0) {
int nDocs = randomIntBetween(0, searchHitsSize);
ScoreDoc[] scoreDocs = new ScoreDoc[nDocs];
float maxScore = 0F;
for (int i = 0; i < nDocs; i++) {
float score = useConstantScore ? 1.0F : Math.abs(randomFloat());
scoreDocs[i] = new ScoreDoc(i, score);
if (score > maxScore) {
maxScore = score;
}
}
topDocs = new TopDocs(scoreDocs.length, scoreDocs, maxScore);
}
List<CompletionSuggestion> shardSuggestion = new ArrayList<>();
for (CompletionSuggestion completionSuggestion : suggestions) {
CompletionSuggestion suggestion = new CompletionSuggestion(completionSuggestion.getName(), completionSuggestion.getSize());
final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5);
suggestion.addTerm(completionEntry);
int optionSize = randomIntBetween(1, suggestion.getSize());
float maxScore = randomIntBetween(suggestion.getSize(), (int) Float.MAX_VALUE);
for (int i = 0; i < optionSize; i++) {
completionEntry.addOption(new CompletionSuggestion.Entry.Option(i, new Text(""), maxScore, Collections.emptyMap()));
float dec = randomIntBetween(0, optionSize);
if (dec <= maxScore) {
maxScore -= dec;
}
}
suggestion.setShardIndex(shardIndex);
shardSuggestion.add(suggestion);
}
querySearchResult.topDocs(topDocs, null);
querySearchResult.size(searchHitsSize);
querySearchResult.suggest(new Suggest(new ArrayList<>(shardSuggestion)));
queryResults.set(shardIndex, querySearchResult);
}
return queryResults;
}
Aggregations