use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class SpanOrTermsBuilder method getSpanQuery.
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
List<SpanQuery> clausesList = new ArrayList<>();
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
while (ts.incrementToken()) {
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef())));
clausesList.add(stq);
}
ts.end();
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new SpanBoostQuery(soq, boost);
} catch (IOException ioe) {
throw new ParserException("IOException parsing value:" + value);
}
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class TermsQueryBuilder method getQuery.
@Override
public Query getQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String text = DOMUtils.getNonBlankTextOrFail(e);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
ts.reset();
while (ts.incrementToken()) {
term = new Term(fieldName, BytesRef.deepCopyOf(termAtt.getBytesRef()));
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
}
ts.end();
} catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
}
Query q = bq.build();
float boost = DOMUtils.getAttribute(e, "boost", 1.0f);
return new BoostQuery(q, boost);
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class CursorMarkTest method getRandomCollation.
private static Object getRandomCollation(SchemaField sf) throws IOException {
Object val;
Analyzer analyzer = sf.getType().getIndexAnalyzer();
String term = TestUtil.randomRealisticUnicodeString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
val = BytesRef.deepCopyOf(termAtt.getBytesRef());
assertFalse(ts.incrementToken());
ts.end();
}
return val;
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class CollationTestBase method assertThreadSafe.
public void assertThreadSafe(final Analyzer analyzer) throws Exception {
int numTestPoints = 100;
int numThreads = TestUtil.nextInt(random(), 3, 5);
final HashMap<String, BytesRef> map = new HashMap<>();
for (int i = 0; i < numTestPoints; i++) {
String term = TestUtil.randomSimpleString(random());
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
// ensure we make a copy of the actual bytes too
map.put(term, BytesRef.deepCopyOf(termAtt.getBytesRef()));
assertFalse(ts.incrementToken());
ts.end();
}
}
Thread[] threads = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
for (Map.Entry<String, BytesRef> mapping : map.entrySet()) {
String term = mapping.getKey();
BytesRef expected = mapping.getValue();
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
assertEquals(expected, termAtt.getBytesRef());
assertFalse(ts.incrementToken());
ts.end();
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
for (int i = 0; i < numThreads; i++) {
threads[i].start();
}
for (int i = 0; i < numThreads; i++) {
threads[i].join();
}
}
use of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute in project lucene-solr by apache.
the class JsonPreAnalyzedParser method toFormattedString.
@Override
public String toFormattedString(Field f) throws IOException {
Map<String, Object> map = new LinkedHashMap<>();
map.put(VERSION_KEY, VERSION);
if (f.fieldType().stored()) {
String stringValue = f.stringValue();
if (stringValue != null) {
map.put(STRING_KEY, stringValue);
}
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
}
}
TokenStream ts = f.tokenStreamValue();
if (ts != null) {
List<Map<String, Object>> tokens = new LinkedList<>();
while (ts.incrementToken()) {
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
String cTerm = null;
String tTerm = null;
Map<String, Object> tok = new TreeMap<>();
while (it.hasNext()) {
Class<? extends Attribute> cl = it.next();
Attribute att = ts.getAttribute(cl);
if (att == null) {
continue;
}
if (cl.isAssignableFrom(CharTermAttribute.class)) {
CharTermAttribute catt = (CharTermAttribute) att;
cTerm = new String(catt.buffer(), 0, catt.length());
} else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
tTerm = tatt.getBytesRef().utf8ToString();
} else {
if (cl.isAssignableFrom(FlagsAttribute.class)) {
tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
} else if (cl.isAssignableFrom(OffsetAttribute.class)) {
tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
} else if (cl.isAssignableFrom(PayloadAttribute.class)) {
BytesRef p = ((PayloadAttribute) att).getPayload();
if (p != null && p.length > 0) {
tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
}
} else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
} else if (cl.isAssignableFrom(TypeAttribute.class)) {
tok.put(TYPE_KEY, ((TypeAttribute) att).type());
} else {
tok.put(cl.getName(), att.toString());
}
}
}
String term = null;
if (cTerm != null) {
term = cTerm;
} else {
term = tTerm;
}
if (term != null && term.length() > 0) {
tok.put(TOKEN_KEY, term);
}
tokens.add(tok);
}
map.put(TOKENS_KEY, tokens);
}
return JSONUtil.toJSON(map, -1);
}
Aggregations