use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.
the class SimilarityJoinPredicate method mergeTuples.
private Tuple mergeTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema, List<Span> mergeSpanList) {
List<IField> resultFields = new ArrayList<>();
for (String attrName : outputSchema.getAttributeNames()) {
// generate a new _ID field for this tuple
if (attrName.equals(SchemaConstants._ID)) {
IDField newID = new IDField(UUID.randomUUID().toString());
resultFields.add(newID);
// use the generated spanList
} else if (attrName.equals(SchemaConstants.SPAN_LIST)) {
resultFields.add(new ListField<Span>(mergeSpanList));
// put the payload of two tuples together
} else if (attrName.equals(SchemaConstants.PAYLOAD)) {
ListField<Span> innerPayloadField = innerTuple.getField(SchemaConstants.PAYLOAD);
List<Span> innerPayload = innerPayloadField.getValue();
ListField<Span> outerPayloadField = outerTuple.getField(SchemaConstants.PAYLOAD);
List<Span> outerPayload = outerPayloadField.getValue();
List<Span> resultPayload = new ArrayList<>();
resultPayload.addAll(innerPayload.stream().map(span -> addFieldPrefix(span, INNER_PREFIX)).collect(Collectors.toList()));
resultPayload.addAll(outerPayload.stream().map(span -> addFieldPrefix(span, "outer_")).collect(Collectors.toList()));
// add other fields from inner/outer tuples
} else {
if (attrName.startsWith(INNER_PREFIX)) {
resultFields.add(innerTuple.getField(attrName.substring(INNER_PREFIX.length())));
} else if (attrName.startsWith(OUTER_PREFIX)) {
resultFields.add(outerTuple.getField(attrName.substring(OUTER_PREFIX.length())));
}
}
}
return new Tuple(outputSchema, resultFields.stream().toArray(IField[]::new));
}
use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.
the class JoinDistancePredicate method joinTuples.
/**
* This method is called by the Join operator to perform the join on the
* tuples passed.
*
* @return New Tuple containing the result of join operation.
*/
@Override
public Tuple joinTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema) throws Exception {
List<Span> newJoinSpanList = new ArrayList<>();
/*
* We expect the values of all fields to be the same for innerTuple and outerTuple.
* We only checks _ID field, and field to be joined, since they are crucial to join operator.
* For other fields, we use the value from innerTuple.
* check if the _ID fields are the same
*/
if (!compareField(innerTuple, outerTuple, SchemaConstants._ID)) {
return null;
}
// check if the fields to be joined are the same
if (!compareField(innerTuple, outerTuple, this.joinAttributeName)) {
return null;
}
/*
* If either/both tuples have no span information, return null.
* Check using try/catch if both the tuples have span information.
* If not return null; so we can process next tuple.
*/
ListField<Span> spanFieldOfInnerTuple = innerTuple.getField(SchemaConstants.SPAN_LIST);
ListField<Span> spanFieldOfOuterTuple = outerTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> innerSpanList = null;
List<Span> outerSpanList = null;
// ListField
if (spanFieldOfInnerTuple.getClass().equals(ListField.class)) {
innerSpanList = spanFieldOfInnerTuple.getValue();
}
if (spanFieldOfOuterTuple.getClass().equals(ListField.class)) {
outerSpanList = spanFieldOfOuterTuple.getValue();
}
Iterator<Span> outerSpanIter = outerSpanList.iterator();
// the ones specified in the JoinPredicate during "sort merge"?)
while (outerSpanIter.hasNext()) {
Span outerSpan = outerSpanIter.next();
// If not return null.
if (!outerSpan.getAttributeName().equals(this.joinAttributeName)) {
continue;
}
Iterator<Span> innerSpanIter = innerSpanList.iterator();
while (innerSpanIter.hasNext()) {
Span innerSpan = innerSpanIter.next();
if (!innerSpan.getAttributeName().equals(this.joinAttributeName)) {
continue;
}
Integer threshold = this.getThreshold();
if (Math.abs(outerSpan.getStart() - innerSpan.getStart()) <= threshold && Math.abs(outerSpan.getEnd() - innerSpan.getEnd()) <= threshold) {
Integer newSpanStartIndex = Math.min(innerSpan.getStart(), outerSpan.getStart());
Integer newSpanEndIndex = Math.max(innerSpan.getEnd(), outerSpan.getEnd());
String attributeName = this.joinAttributeName;
String fieldValue = (String) innerTuple.getField(attributeName).getValue();
String newFieldValue = fieldValue.substring(newSpanStartIndex, newSpanEndIndex);
String spanKey = outerSpan.getKey() + "_" + innerSpan.getKey();
Span newSpan = new Span(attributeName, newSpanStartIndex, newSpanEndIndex, spanKey, newFieldValue);
newJoinSpanList.add(newSpan);
}
}
}
if (newJoinSpanList.isEmpty()) {
return null;
}
// create output fields based on innerTuple's value
List<Attribute> outputAttrList = outputSchema.getAttributes();
List<IField> outputFields = outputAttrList.stream().filter(attr -> !attr.equals(SchemaConstants.SPAN_LIST_ATTRIBUTE)).map(attr -> attr.getAttributeName()).map(attributeName -> innerTuple.getField(attributeName, IField.class)).collect(Collectors.toList());
outputFields.add(new ListField<>(newJoinSpanList));
return new Tuple(outputSchema, outputFields.stream().toArray(IField[]::new));
}
use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.
the class NlpEntityOperator method processOneInputTuple.
@Override
public Tuple processOneInputTuple(Tuple inputTuple) throws TextDBException {
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : predicate.getAttributeNames()) {
IField field = inputTuple.getField(attributeName);
matchingResults.addAll(extractNlpSpans(field, attributeName));
}
if (matchingResults.isEmpty()) {
return null;
}
ListField<Span> spanListField = inputTuple.getField(predicate.getSpanListName());
List<Span> spanList = spanListField.getValue();
spanList.addAll(matchingResults);
return inputTuple;
}
use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.
the class RegexSplitOperator method populateOutputBuffer.
// If the regex does not have any match in the tuple, we return the whole string as the result.
private void populateOutputBuffer(Tuple inputTuple) throws TextDBException {
if (inputTuple == null) {
return;
}
AttributeType attributeType = this.inputSchema.getAttribute(predicate.getAttributeToSplit()).getAttributeType();
if (attributeType != AttributeType.TEXT && attributeType != AttributeType.STRING) {
return;
}
String strToSplit = inputTuple.getField(predicate.getAttributeToSplit()).getValue().toString();
List<String> stringList = splitText(strToSplit);
outputTupleBuffer = new ArrayList<>();
for (String singleMatch : stringList) {
List<IField> tupleFieldList = new ArrayList<>();
// Generate the new UUID.
tupleFieldList.add(IDField.newRandomID());
for (String attributeName : inputSchema.getAttributeNames()) {
// Remove the old ID.
if (attributeName.equals(SchemaConstants._ID)) {
continue;
}
if (attributeName.equals(predicate.getAttributeToSplit())) {
if (attributeType == AttributeType.TEXT) {
tupleFieldList.add(new TextField(singleMatch));
} else {
tupleFieldList.add(new StringField(singleMatch));
}
} else {
tupleFieldList.add(inputTuple.getField(attributeName));
}
}
outputTupleBuffer.add(new Tuple(outputSchema, tupleFieldList.stream().toArray(IField[]::new)));
}
}
use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.
the class NlpSentimentOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TextDBException {
if (cursor == CLOSED) {
return null;
}
Tuple inputTuple = inputOperator.getNextTuple();
if (inputTuple == null) {
return null;
}
List<IField> outputFields = new ArrayList<>();
outputFields.addAll(inputTuple.getFields());
outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
return new Tuple(outputSchema, outputFields);
}
Aggregations