use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.
the class PathTokenFilterTest method tokenise.
private void tokenise(TokenStream ts, String[] tokens) throws IOException {
int i = 0;
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
System.out.println("token: " + ts.reflectAsString(true));
String termText = termAtt.toString();
if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE)) {
assert (i % 2 == 0);
assertEquals(termText, tokens[i++]);
} else if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAMESPACE_PREFIX)) {
assert (i % 2 == 0);
assertEquals(termText, tokens[i++]);
} else if (typeAtt.type().equals(PathTokenFilter.TOKEN_TYPE_PATH_ELEMENT_NAME)) {
assert (i % 2 == 1);
assertEquals(termText, tokens[i++]);
}
}
ts.end();
} finally {
ts.close();
}
if (i != tokens.length) {
fail("Invalid number of tokens, found " + i + " and expected " + tokens.length);
}
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.
the class PathTokenFilterTest method testAttributesAfterStreamEnd.
public void testAttributesAfterStreamEnd() throws IOException {
final String path = "uri1:one";
StringReader reader = new StringReader(path);
PathTokenFilter ts = new PathTokenFilter(PathTokenFilter.PATH_SEPARATOR, PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT, PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);
ts.setReader(reader);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
// PathTokenFilter.end() will be called after all tokens consumed.
tokenise(ts, new String[] { "uri1", "one" });
// Check attributes cleaned up
assertEquals("", termAtt.toString());
// the default
assertEquals("word", typeAtt.type());
assertEquals(0, posIncAtt.getPositionIncrement());
// Final offset...
assertEquals(path.length(), offsetAtt.startOffset());
assertEquals(path.length(), offsetAtt.endOffset());
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.
the class Solr4QueryParser method getToken.
protected String getToken(String field, String value, AnalysisMode analysisMode) throws ParseException {
try (TokenStream source = getAnalyzer().tokenStream(field, new StringReader(value))) {
String tokenised = null;
while (source.incrementToken()) {
CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = null;
if (source.hasAttribute(TypeAttribute.class)) {
typeAtt = source.getAttribute(TypeAttribute.class);
}
PositionIncrementAttribute posIncAtt = null;
if (source.hasAttribute(PositionIncrementAttribute.class)) {
posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
}
PackedTokenAttributeImpl token = new PackedTokenAttributeImpl();
token.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
if (typeAtt != null) {
token.setType(typeAtt.type());
}
if (posIncAtt != null) {
token.setPositionIncrement(posIncAtt.getPositionIncrement());
}
tokenised = token.toString();
}
return tokenised;
} catch (IOException e) {
throw new ParseException("IO" + e.getMessage());
}
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project SearchServices by Alfresco.
the class Solr4QueryParser method getFirstTokenForRange.
private String getFirstTokenForRange(String string, FieldInstance field) throws IOException {
PackedTokenAttributeImpl nextToken;
TokenStream source = null;
;
try {
source = getAnalyzer().tokenStream(field.getField(), new StringReader(string));
source.reset();
while (source.incrementToken()) {
CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = null;
if (source.hasAttribute(TypeAttribute.class)) {
typeAtt = source.getAttribute(TypeAttribute.class);
}
PositionIncrementAttribute posIncAtt = null;
if (source.hasAttribute(PositionIncrementAttribute.class)) {
posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
}
nextToken = new PackedTokenAttributeImpl();
nextToken.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
nextToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
if (typeAtt != null) {
nextToken.setType(typeAtt.type());
}
if (posIncAtt != null) {
nextToken.setPositionIncrement(posIncAtt.getPositionIncrement());
}
return nextToken.toString();
}
} finally {
try {
if (source != null) {
source.close();
}
} catch (IOException e) {
// ignore
}
}
return null;
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project jackrabbit by apache.
the class JackrabbitQueryParser method getPrefixQuery.
/**
* {@inheritDoc}
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// only create a prefix query when the term is a single word / token
Analyzer a = getAnalyzer();
TokenStream ts = a.tokenStream(field, new StringReader(termStr));
int count = 0;
boolean isCJ = false;
try {
TypeAttribute t = ts.addAttribute(TypeAttribute.class);
ts.reset();
while (ts.incrementToken()) {
count++;
isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
}
ts.end();
} catch (IOException e) {
throw new ParseException(e.getMessage());
} finally {
try {
ts.close();
} catch (IOException e) {
// ignore
}
}
if (count > 1 && isCJ) {
return getFieldQuery(field, termStr);
} else {
return getWildcardQuery(field, termStr + "*");
}
}
Aggregations