Search in sources :

Example 11 with PackedTokenAttributeImpl

use of org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl in project SearchServices by Alfresco.

the class PathTokenFilter method nextToken.

private PackedTokenAttributeImpl nextToken() throws IOException {
    if (endOfStream) {
        return null;
    }
    StringBuilder buffer = new StringBuilder(64);
    boolean inNameSpace = false;
    int start = readerPosition;
    int current;
    char c;
    while ((current = input.read()) != -1) {
        c = (char) current;
        readerPosition++;
        if (c == nsStartDelimiter) {
            inNameSpace = true;
        } else if (c == nsEndDelimiter) {
            inNameSpace = false;
        } else if (!inNameSpace && (c == '/')) {
            PackedTokenAttributeImpl qNameToken = new PackedTokenAttributeImpl();
            qNameToken.setEmpty().append(buffer.toString());
            qNameToken.setOffset(start, readerPosition - 1);
            qNameToken.setType("QNAME");
            return qNameToken;
        } else if (!inNameSpace && (c == ';')) {
            buffer.append(c);
            PackedTokenAttributeImpl lastQNameToken = new PackedTokenAttributeImpl();
            lastQNameToken.setEmpty().append(buffer.toString());
            lastQNameToken.setOffset(start, readerPosition);
            lastQNameToken.setType("LASTQNAME");
            return lastQNameToken;
        }
        buffer.append(c);
    }
    int end = readerPosition - 1;
    // Stop the final token being returned with an end before the start.
    if (start > end) {
        end = start;
    }
    endOfStream = true;
    if (!inNameSpace) {
        PackedTokenAttributeImpl qNameToken = new PackedTokenAttributeImpl();
        qNameToken.setEmpty().append(buffer.toString());
        qNameToken.setOffset(start, end);
        qNameToken.setType("QNAME");
        return qNameToken;
    } else {
        throw new IllegalStateException("QName terminated incorrectly: " + buffer.toString());
    }
}
Also used : PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl)

Example 12 with PackedTokenAttributeImpl

use of org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl in project SearchServices by Alfresco.

the class Solr4QueryParser method getToken.

protected String getToken(String field, String value, AnalysisMode analysisMode) throws ParseException {
    try (TokenStream source = getAnalyzer().tokenStream(field, new StringReader(value))) {
        String tokenised = null;
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
            token.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                token.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                token.setPositionIncrement(posIncAtt.getPositionIncrement());
            }
            tokenised = token.toString();
        }
        return tokenised;
    } catch (IOException e) {
        throw new ParseException("IO" + e.getMessage());
    }
}
Also used : PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) IOException(java.io.IOException) ParseException(org.apache.lucene.queryparser.classic.ParseException) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 13 with PackedTokenAttributeImpl

use of org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl in project SearchServices by Alfresco.

the class Solr4QueryParser method generateSpanOrQuery.

/**
 * @param field
 * @param fixedTokenSequences
 *            LinkedList<LinkedList<PackedTokenAttributeImpl>>
 * @return Query
 */
protected SpanQuery generateSpanOrQuery(String field, LinkedList<LinkedList<PackedTokenAttributeImpl>> fixedTokenSequences) {
    PackedTokenAttributeImpl nextToken;
    ArrayList<SpanQuery> spanOrQueryParts = new ArrayList<SpanQuery>();
    for (LinkedList<PackedTokenAttributeImpl> tokenSequence : fixedTokenSequences) {
        int gap = 1;
        SpanQuery spanQuery = null;
        ArrayList<SpanQuery> atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
        // create flat nearQuery
        if (getEnablePositionIncrements() && isAllTokensSequentiallyShifted(tokenSequence)) {
            // there will be no tokens at same position
            List<SpanQuery> wildWrappedList = new ArrayList<SpanQuery>(tokenSequence.size());
            for (PackedTokenAttributeImpl token : tokenSequence) {
                String termText = token.toString();
                Term term = new Term(field, termText);
                SpanQuery nextSpanQuery = wrapWildcardTerms(term);
                wildWrappedList.add(nextSpanQuery);
            }
            if (wildWrappedList.size() == 1) {
                spanQuery = wildWrappedList.get(0);
            } else {
                spanQuery = new SpanNearQuery(wildWrappedList.toArray(new SpanQuery[wildWrappedList.size()]), 0, true);
            }
        } else {
            for (int i = 0; i < tokenSequence.size(); i++) {
                nextToken = (PackedTokenAttributeImpl) tokenSequence.get(i);
                String termText = nextToken.toString();
                Term term = new Term(field, termText);
                if (getEnablePositionIncrements()) {
                    SpanQuery nextSpanQuery = wrapWildcardTerms(term);
                    if (gap == 0) {
                        atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                    } else {
                        if (atSamePositionSpanOrQueryParts.size() == 0) {
                            if (spanQuery == null) {
                                spanQuery = nextSpanQuery;
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, nextSpanQuery }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                        } else if (atSamePositionSpanOrQueryParts.size() == 1) {
                            if (spanQuery == null) {
                                spanQuery = atSamePositionSpanOrQueryParts.get(0);
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                            atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                        } else {
                            if (spanQuery == null) {
                                spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
                            } else {
                                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
                            }
                            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
                            atSamePositionSpanOrQueryParts.add(nextSpanQuery);
                        }
                    }
                    gap = nextToken.getPositionIncrement();
                } else {
                    SpanQuery nextSpanQuery;
                    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                        org.apache.lucene.search.WildcardQuery wildQuery = new org.apache.lucene.search.WildcardQuery(term);
                        SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery> wrapper = new SpanMultiTermQueryWrapper<org.apache.lucene.search.WildcardQuery>(wildQuery);
                        wrapper.setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(topTermSpanRewriteLimit));
                        nextSpanQuery = wrapper;
                    } else {
                        nextSpanQuery = new SpanTermQuery(term);
                    }
                    if (spanQuery == null) {
                        spanQuery = new SpanOrQuery(nextSpanQuery);
                    } else {
                        spanQuery = new SpanOrQuery(spanQuery, nextSpanQuery);
                    }
                }
            }
        }
        if (atSamePositionSpanOrQueryParts.size() == 0) {
            spanOrQueryParts.add(spanQuery);
        } else if (atSamePositionSpanOrQueryParts.size() == 1) {
            if (spanQuery == null) {
                spanQuery = atSamePositionSpanOrQueryParts.get(0);
            } else {
                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, atSamePositionSpanOrQueryParts.get(0) }, (gap - 1) + internalSlop, internalSlop < 2);
            }
            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
            spanOrQueryParts.add(spanQuery);
        } else {
            if (spanQuery == null) {
                spanQuery = new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {}));
            } else {
                spanQuery = new SpanNearQuery(new SpanQuery[] { spanQuery, new SpanOrQuery(atSamePositionSpanOrQueryParts.toArray(new SpanQuery[] {})) }, (gap - 1) + internalSlop, internalSlop < 2);
            }
            atSamePositionSpanOrQueryParts = new ArrayList<SpanQuery>();
            spanOrQueryParts.add(spanQuery);
        }
    }
    if (spanOrQueryParts.size() == 1) {
        return spanOrQueryParts.get(0);
    } else {
        return new SpanOrQuery(spanOrQueryParts.toArray(new SpanQuery[] {}));
    }
}
Also used : SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TopTermsSpanBooleanQueryRewrite(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 14 with PackedTokenAttributeImpl

use of org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl in project SearchServices by Alfresco.

the class Solr4QueryParser method getFirstTokenForRange.

private String getFirstTokenForRange(String string, FieldInstance field) throws IOException {
    PackedTokenAttributeImpl nextToken;
    TokenStream source = null;
    ;
    try {
        source = getAnalyzer().tokenStream(field.getField(), new StringReader(string));
        source.reset();
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            nextToken = new PackedTokenAttributeImpl();
            nextToken.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            nextToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                nextToken.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                nextToken.setPositionIncrement(posIncAtt.getPositionIncrement());
            }
            return nextToken.toString();
        }
    } finally {
        try {
            if (source != null) {
                source.close();
            }
        } catch (IOException e) {
        // ignore
        }
    }
    return null;
}
Also used : PackedTokenAttributeImpl(org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) TypeAttribute(org.apache.lucene.analysis.tokenattributes.TypeAttribute) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) IOException(java.io.IOException) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Aggregations

PackedTokenAttributeImpl (org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl)14 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)5 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)5 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)5 TypeAttribute (org.apache.lucene.analysis.tokenattributes.TypeAttribute)5 IOException (java.io.IOException)4 StringReader (java.io.StringReader)4 ArrayList (java.util.ArrayList)4 TokenStream (org.apache.lucene.analysis.TokenStream)4 SpanQuery (org.apache.lucene.search.spans.SpanQuery)4 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)4 Term (org.apache.lucene.index.Term)3 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)3 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)3 LinkedList (java.util.LinkedList)2 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)2 TopTermsSpanBooleanQueryRewrite (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite)2 DecimalFormat (java.text.DecimalFormat)1 NumberFormat (java.text.NumberFormat)1 Iterator (java.util.Iterator)1