use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.
the class TestNumericTokenStream method testLongStream.
public void testLongStream() throws Exception {
@SuppressWarnings("resource") final LegacyNumericTokenStream stream = new LegacyNumericTokenStream().setLongValue(lvalue);
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
assertNotNull(bytesAtt);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
assertNotNull(typeAtt);
final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
assertNotNull(numericAtt);
stream.reset();
assertEquals(64, numericAtt.getValueSize());
for (int shift = 0; shift < 64; shift += LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), LegacyNumericUtils.prefixCodedToLong(bytesAtt.getBytesRef()));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.
the class TestNumericTokenStream method testIntStream.
public void testIntStream() throws Exception {
@SuppressWarnings("resource") final LegacyNumericTokenStream stream = new LegacyNumericTokenStream().setIntValue(ivalue);
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
assertNotNull(bytesAtt);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
assertNotNull(typeAtt);
final LegacyNumericTokenStream.LegacyNumericTermAttribute numericAtt = stream.getAttribute(LegacyNumericTokenStream.LegacyNumericTermAttribute.class);
assertNotNull(numericAtt);
stream.reset();
assertEquals(32, numericAtt.getValueSize());
for (int shift = 0; shift < 32; shift += LegacyNumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), LegacyNumericUtils.prefixCodedToInt(bytesAtt.getBytesRef()));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? LegacyNumericTokenStream.TOKEN_TYPE_FULL_PREC : LegacyNumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.
the class JsonPreAnalyzedParser method parse.
@SuppressWarnings("unchecked")
@Override
public ParseResult parse(Reader reader, AttributeSource parent) throws IOException {
ParseResult res = new ParseResult();
StringBuilder sb = new StringBuilder();
char[] buf = new char[128];
int cnt;
while ((cnt = reader.read(buf)) > 0) {
sb.append(buf, 0, cnt);
}
String val = sb.toString();
// empty string - accept even without version number
if (val.length() == 0) {
return res;
}
Object o = ObjectBuilder.fromJSON(val);
if (!(o instanceof Map)) {
throw new IOException("Invalid JSON type " + o.getClass().getName() + ", expected Map");
}
Map<String, Object> map = (Map<String, Object>) o;
// check version
String version = (String) map.get(VERSION_KEY);
if (version == null) {
throw new IOException("Missing VERSION key");
}
if (!VERSION.equals(version)) {
throw new IOException("Unknown VERSION '" + version + "', expected " + VERSION);
}
if (map.containsKey(STRING_KEY) && map.containsKey(BINARY_KEY)) {
throw new IOException("Field cannot have both stringValue and binaryValue");
}
res.str = (String) map.get(STRING_KEY);
String bin = (String) map.get(BINARY_KEY);
if (bin != null) {
byte[] data = Base64.base64ToByteArray(bin);
res.bin = data;
}
List<Object> tokens = (List<Object>) map.get(TOKENS_KEY);
if (tokens == null) {
return res;
}
int tokenStart = 0;
int tokenEnd = 0;
parent.clearAttributes();
for (Object ot : tokens) {
// automatic increment by 1 separator
tokenStart = tokenEnd + 1;
Map<String, Object> tok = (Map<String, Object>) ot;
boolean hasOffsetStart = false;
boolean hasOffsetEnd = false;
int len = -1;
for (Entry<String, Object> e : tok.entrySet()) {
String key = e.getKey();
if (key.equals(TOKEN_KEY)) {
CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class);
String str = String.valueOf(e.getValue());
catt.append(str);
len = str.length();
} else if (key.equals(OFFSET_START_KEY)) {
Object obj = e.getValue();
hasOffsetStart = true;
if (obj instanceof Number) {
tokenStart = ((Number) obj).intValue();
} else {
try {
tokenStart = Integer.parseInt(String.valueOf(obj));
} catch (NumberFormatException nfe) {
LOG.warn("Invalid " + OFFSET_START_KEY + " attribute, skipped: '" + obj + "'");
hasOffsetStart = false;
}
}
} else if (key.equals(OFFSET_END_KEY)) {
hasOffsetEnd = true;
Object obj = e.getValue();
if (obj instanceof Number) {
tokenEnd = ((Number) obj).intValue();
} else {
try {
tokenEnd = Integer.parseInt(String.valueOf(obj));
} catch (NumberFormatException nfe) {
LOG.warn("Invalid " + OFFSET_END_KEY + " attribute, skipped: '" + obj + "'");
hasOffsetEnd = false;
}
}
} else if (key.equals(POSINCR_KEY)) {
Object obj = e.getValue();
int posIncr = 1;
if (obj instanceof Number) {
posIncr = ((Number) obj).intValue();
} else {
try {
posIncr = Integer.parseInt(String.valueOf(obj));
} catch (NumberFormatException nfe) {
LOG.warn("Invalid " + POSINCR_KEY + " attribute, skipped: '" + obj + "'");
}
}
PositionIncrementAttribute patt = parent.addAttribute(PositionIncrementAttribute.class);
patt.setPositionIncrement(posIncr);
} else if (key.equals(PAYLOAD_KEY)) {
String str = String.valueOf(e.getValue());
if (str.length() > 0) {
byte[] data = Base64.base64ToByteArray(str);
PayloadAttribute p = parent.addAttribute(PayloadAttribute.class);
if (data != null && data.length > 0) {
p.setPayload(new BytesRef(data));
}
}
} else if (key.equals(FLAGS_KEY)) {
try {
int f = Integer.parseInt(String.valueOf(e.getValue()), 16);
FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class);
flags.setFlags(f);
} catch (NumberFormatException nfe) {
LOG.warn("Invalid " + FLAGS_KEY + " attribute, skipped: '" + e.getValue() + "'");
}
} else if (key.equals(TYPE_KEY)) {
TypeAttribute tattr = parent.addAttribute(TypeAttribute.class);
tattr.setType(String.valueOf(e.getValue()));
} else {
LOG.warn("Unknown attribute, skipped: " + e.getKey() + "=" + e.getValue());
}
}
// handle offset attr
OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class);
if (!hasOffsetEnd && len > -1) {
tokenEnd = tokenStart + len;
}
offset.setOffset(tokenStart, tokenEnd);
if (!hasOffsetStart) {
tokenStart = tokenEnd + 1;
}
// capture state and add to result
State state = parent.captureState();
res.states.add(state.clone());
// reset for reuse
parent.clearAttributes();
}
return res;
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.
the class SpellingQueryConverter method analyze.
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
TokenStream stream = analyzer.tokenStream("", text);
// TODO: support custom attributes
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset());
//overwriting any flags already set...
token.setFlags(flagsAttValue);
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
stream.end();
stream.close();
}
use of org.apache.lucene.analysis.tokenattributes.TypeAttribute in project lucene-solr by apache.
the class SpellCheckComponent method getTokens.
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
Collection<Token> result = new ArrayList<>();
assert analyzer != null;
try (TokenStream ts = analyzer.tokenStream("", q)) {
ts.reset();
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
while (ts.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
ts.end();
return result;
}
}
Aggregations