Search in sources :

Example 1 with CharsRefBuilder

use of org.apache.lucene.util.CharsRefBuilder in project elasticsearch by elastic.

the class CompletionFieldMapperTests method testFieldValueValidation.

public void testFieldValueValidation() throws Exception {
    String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("completion").field("type", "completion").endObject().endObject().endObject().endObject().string();
    DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse("type1", new CompressedXContent(mapping));
    CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
    charsRefBuilder.append("sugg");
    charsRefBuilder.setCharAt(2, '');
    try {
        defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder().startObject().field("completion", charsRefBuilder.get().toString()).endObject().bytes());
        fail("No error indexing value with reserved character [0x1F]");
    } catch (MapperParsingException e) {
        Throwable cause = e.unwrapCause().getCause();
        assertThat(cause, instanceOf(IllegalArgumentException.class));
        assertThat(cause.getMessage(), containsString("[0x1f]"));
    }
    charsRefBuilder.setCharAt(2, '');
    try {
        defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder().startObject().field("completion", charsRefBuilder.get().toString()).endObject().bytes());
        fail("No error indexing value with reserved character [0x0]");
    } catch (MapperParsingException e) {
        Throwable cause = e.unwrapCause().getCause();
        assertThat(cause, instanceOf(IllegalArgumentException.class));
        assertThat(cause.getMessage(), containsString("[0x0]"));
    }
    charsRefBuilder.setCharAt(2, '');
    try {
        defaultMapper.parse("test", "type1", "1", XContentFactory.jsonBuilder().startObject().field("completion", charsRefBuilder.get().toString()).endObject().bytes());
        fail("No error indexing value with reserved character [0x1E]");
    } catch (MapperParsingException e) {
        Throwable cause = e.unwrapCause().getCause();
        assertThat(cause, instanceOf(IllegalArgumentException.class));
        assertThat(cause.getMessage(), containsString("[0x1e]"));
    }
}
Also used : CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) Matchers.containsString(org.hamcrest.Matchers.containsString) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder)

Example 2 with CharsRefBuilder

use of org.apache.lucene.util.CharsRefBuilder in project elasticsearch by elastic.

the class XMoreLikeThis method addTermFrequencies.

/**
     * Adds terms and frequencies found in vector into the Map termFreqMap
     *
     * @param termFreqMap a Map of terms and their frequencies
     * @param vector List of terms and their frequencies for a doc/field
     * @param fieldName Optional field name of the terms for skip terms
     */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }
        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }
        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 3 with CharsRefBuilder

use of org.apache.lucene.util.CharsRefBuilder in project elasticsearch by elastic.

the class SuggestPhase method execute.

@Override
public void execute(SearchContext context) {
    final SuggestionSearchContext suggest = context.suggest();
    if (suggest == null) {
        return;
    }
    try {
        CharsRefBuilder spare = new CharsRefBuilder();
        final List<Suggestion<? extends Entry<? extends Option>>> suggestions = new ArrayList<>(suggest.suggestions().size());
        for (Map.Entry<String, SuggestionSearchContext.SuggestionContext> entry : suggest.suggestions().entrySet()) {
            SuggestionSearchContext.SuggestionContext suggestion = entry.getValue();
            Suggester<SuggestionContext> suggester = suggestion.getSuggester();
            Suggestion<? extends Entry<? extends Option>> result = suggester.execute(entry.getKey(), suggestion, context.searcher(), spare);
            if (result != null) {
                assert entry.getKey().equals(result.name);
                suggestions.add(result);
            }
        }
        context.queryResult().suggest(new Suggest(suggestions));
    } catch (IOException e) {
        throw new ElasticsearchException("I/O exception during suggest phase", e);
    }
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) SuggestionContext(org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext) Suggestion(org.elasticsearch.search.suggest.Suggest.Suggestion) Entry(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry) SuggestionContext(org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext) Option(org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) Map(java.util.Map)

Example 4 with CharsRefBuilder

use of org.apache.lucene.util.CharsRefBuilder in project lucene-skos by behas.

the class AbstractSKOSFilter method processTermOnStack.

/**
     * Replaces the current term (attributes) with term (attributes) from the stack
     *
     * @throws IOException if analyzer failed
     */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();
    String term = expandedTerm.getTerm();
    SKOSType termType = expandedTerm.getTermType();
    String sTerm;
    try {
        CharsRefBuilder builder = new CharsRefBuilder();
        sTerm = analyze(analyzer, term, builder).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }
    // copies the values of all attribute implementations from this state into
    // the implementations of the target stream
    restoreState(current);
    // adds the expanded term to the term buffer
    termAtt.setEmpty().append(sTerm);
    // set position increment to zero to put multiple terms into the same position
    posIncrAtt.setPositionIncrement(0);
    // set offset of the original expression (usefull for highlighting)
    if (expandedTerm.getStart() >= 0 && expandedTerm.getEnd() >= 0)
        offsettAtt.setOffset(expandedTerm.getStart(), expandedTerm.getEnd());
    // sets the type of the expanded term (pref, alt, broader, narrower, etc.)
    skosAtt.setSkosType(termType);
    // converts the SKOS Attribute to a payload, which is propagated to the index
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}
Also used : SKOSType(at.ac.univie.mminf.luceneSKOS.analysis.SKOSTypeAttribute.SKOSType) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 5 with CharsRefBuilder

use of org.apache.lucene.util.CharsRefBuilder in project lucene-solr by apache.

the class TestSynonymGraphFilter method assertMapping.

private void assertMapping(String inputString, String outputString) throws IOException {
    SynonymMap.Builder builder = new SynonymMap.Builder(false);
    // the rules must be lowercased up front, but the incoming tokens will be case insensitive:
    CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
    CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
    builder.add(input, output, true);
    Analyzer analyzer = new CustomAnalyzer(builder.build());
    TokenStream tokenStream = analyzer.tokenStream("field", inputString);
    assertTokenStreamContents(tokenStream, new String[] { outputString, inputString });
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) CharsRef(org.apache.lucene.util.CharsRef)

Aggregations

CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)52 BytesRef (org.apache.lucene.util.BytesRef)30 ArrayList (java.util.ArrayList)11 IOException (java.io.IOException)10 NamedList (org.apache.solr.common.util.NamedList)10 FieldType (org.apache.solr.schema.FieldType)10 TermsEnum (org.apache.lucene.index.TermsEnum)9 SchemaField (org.apache.solr.schema.SchemaField)7 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)6 HashSet (java.util.HashSet)5 Test (org.junit.Test)5 TokenStream (org.apache.lucene.analysis.TokenStream)4 PostingsEnum (org.apache.lucene.index.PostingsEnum)4 Terms (org.apache.lucene.index.Terms)4 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)4 LeafReader (org.apache.lucene.index.LeafReader)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 CharsRef (org.apache.lucene.util.CharsRef)3 Util (org.apache.lucene.util.fst.Util)3 SolrException (org.apache.solr.common.SolrException)3