use of edu.uci.ics.textdb.api.schema.AttributeType in project textdb by TextDB.
the class DataWriter method getLuceneDocument.
/*
* Converts a TextDB tuple to a Lucene document
*/
private static Document getLuceneDocument(Tuple tuple) {
List<IField> fields = tuple.getFields();
List<Attribute> attributes = tuple.getSchema().getAttributes();
Document doc = new Document();
for (int count = 0; count < fields.size(); count++) {
IField field = fields.get(count);
Attribute attr = attributes.get(count);
AttributeType attributeType = attr.getAttributeType();
doc.add(StorageUtils.getLuceneField(attributeType, attr.getAttributeName(), field.getValue()));
}
return doc;
}
use of edu.uci.ics.textdb.api.schema.AttributeType in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method computeMatchingResult.
/*
* Match the key against the Tuple. if there's no match, returns the
* original Tuple object, if there's a match, return a new Tuple
* with span list added
*/
private Tuple computeMatchingResult(String key, Tuple sourceTuple) throws TextDBException {
List<String> attributeNames = predicate.getAttributeNames();
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : attributeNames) {
String fieldValue = sourceTuple.getField(attributeName).getValue().toString();
AttributeType attributeType = inputSchema.getAttribute(attributeName).getAttributeType();
// fieldValue exactly
if (attributeType != AttributeType.TEXT) {
if (fieldValue.equals(key)) {
matchingResults.add(new Span(attributeName, 0, fieldValue.length(), key, fieldValue));
}
} else // if attribute type is TEXT, then key can match a substring of
// fieldValue
{
String regex = key.toLowerCase();
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(fieldValue.toLowerCase());
while (matcher.find()) {
int start = matcher.start();
int end = matcher.end();
matchingResults.add(new Span(attributeName, start, end, key, fieldValue.substring(start, end)));
}
}
}
advanceDictionaryCursor();
if (matchingResults.size() == 0) {
return null;
}
ListField<Span> spanListField = sourceTuple.getField(predicate.getSpanListName());
List<Span> spanList = spanListField.getValue();
spanList.addAll(matchingResults);
return sourceTuple;
}
Aggregations