Search in sources :

Example 91 with IField

use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.

the class SimilarityJoinPredicate method mergeTuples.

private Tuple mergeTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema, List<Span> mergeSpanList) {
    List<IField> resultFields = new ArrayList<>();
    for (String attrName : outputSchema.getAttributeNames()) {
        // generate a new _ID field for this tuple
        if (attrName.equals(SchemaConstants._ID)) {
            IDField newID = new IDField(UUID.randomUUID().toString());
            resultFields.add(newID);
        // use the generated spanList
        } else if (attrName.equals(SchemaConstants.SPAN_LIST)) {
            resultFields.add(new ListField<Span>(mergeSpanList));
        // put the payload of two tuples together
        } else if (attrName.equals(SchemaConstants.PAYLOAD)) {
            ListField<Span> innerPayloadField = innerTuple.getField(SchemaConstants.PAYLOAD);
            List<Span> innerPayload = innerPayloadField.getValue();
            ListField<Span> outerPayloadField = outerTuple.getField(SchemaConstants.PAYLOAD);
            List<Span> outerPayload = outerPayloadField.getValue();
            List<Span> resultPayload = new ArrayList<>();
            resultPayload.addAll(innerPayload.stream().map(span -> addFieldPrefix(span, INNER_PREFIX)).collect(Collectors.toList()));
            resultPayload.addAll(outerPayload.stream().map(span -> addFieldPrefix(span, "outer_")).collect(Collectors.toList()));
        // add other fields from inner/outer tuples
        } else {
            if (attrName.startsWith(INNER_PREFIX)) {
                resultFields.add(innerTuple.getField(attrName.substring(INNER_PREFIX.length())));
            } else if (attrName.startsWith(OUTER_PREFIX)) {
                resultFields.add(outerTuple.getField(attrName.substring(OUTER_PREFIX.length())));
            }
        }
    }
    return new Tuple(outputSchema, resultFields.stream().toArray(IField[]::new));
}
Also used : IDField(edu.uci.ics.textdb.api.field.IDField) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span)

Example 92 with IField

use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.

the class JoinDistancePredicate method joinTuples.

/**
     * This method is called by the Join operator to perform the join on the 
     * tuples passed.
     * 
     * @return New Tuple containing the result of join operation.
     */
@Override
public Tuple joinTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema) throws Exception {
    List<Span> newJoinSpanList = new ArrayList<>();
    /*
	     * We expect the values of all fields to be the same for innerTuple and outerTuple.
	     * We only checks _ID field, and field to be joined, since they are crucial to join operator.
	     * For other fields, we use the value from innerTuple.
	     * check if the _ID fields are the same
	     */
    if (!compareField(innerTuple, outerTuple, SchemaConstants._ID)) {
        return null;
    }
    // check if the fields to be joined are the same
    if (!compareField(innerTuple, outerTuple, this.joinAttributeName)) {
        return null;
    }
    /*
	     * If either/both tuples have no span information, return null.
	     * Check using try/catch if both the tuples have span information.
	     * If not return null; so we can process next tuple.
	     */
    ListField<Span> spanFieldOfInnerTuple = innerTuple.getField(SchemaConstants.SPAN_LIST);
    ListField<Span> spanFieldOfOuterTuple = outerTuple.getField(SchemaConstants.SPAN_LIST);
    List<Span> innerSpanList = null;
    List<Span> outerSpanList = null;
    // ListField
    if (spanFieldOfInnerTuple.getClass().equals(ListField.class)) {
        innerSpanList = spanFieldOfInnerTuple.getValue();
    }
    if (spanFieldOfOuterTuple.getClass().equals(ListField.class)) {
        outerSpanList = spanFieldOfOuterTuple.getValue();
    }
    Iterator<Span> outerSpanIter = outerSpanList.iterator();
    // the ones specified in the JoinPredicate during "sort merge"?)
    while (outerSpanIter.hasNext()) {
        Span outerSpan = outerSpanIter.next();
        // If not return null.
        if (!outerSpan.getAttributeName().equals(this.joinAttributeName)) {
            continue;
        }
        Iterator<Span> innerSpanIter = innerSpanList.iterator();
        while (innerSpanIter.hasNext()) {
            Span innerSpan = innerSpanIter.next();
            if (!innerSpan.getAttributeName().equals(this.joinAttributeName)) {
                continue;
            }
            Integer threshold = this.getThreshold();
            if (Math.abs(outerSpan.getStart() - innerSpan.getStart()) <= threshold && Math.abs(outerSpan.getEnd() - innerSpan.getEnd()) <= threshold) {
                Integer newSpanStartIndex = Math.min(innerSpan.getStart(), outerSpan.getStart());
                Integer newSpanEndIndex = Math.max(innerSpan.getEnd(), outerSpan.getEnd());
                String attributeName = this.joinAttributeName;
                String fieldValue = (String) innerTuple.getField(attributeName).getValue();
                String newFieldValue = fieldValue.substring(newSpanStartIndex, newSpanEndIndex);
                String spanKey = outerSpan.getKey() + "_" + innerSpan.getKey();
                Span newSpan = new Span(attributeName, newSpanStartIndex, newSpanEndIndex, spanKey, newFieldValue);
                newJoinSpanList.add(newSpan);
            }
        }
    }
    if (newJoinSpanList.isEmpty()) {
        return null;
    }
    // create output fields based on innerTuple's value
    List<Attribute> outputAttrList = outputSchema.getAttributes();
    List<IField> outputFields = outputAttrList.stream().filter(attr -> !attr.equals(SchemaConstants.SPAN_LIST_ATTRIBUTE)).map(attr -> attr.getAttributeName()).map(attributeName -> innerTuple.getField(attributeName, IField.class)).collect(Collectors.toList());
    outputFields.add(new ListField<>(newJoinSpanList));
    return new Tuple(outputSchema, outputFields.stream().toArray(IField[]::new));
}
Also used : SchemaConstants(edu.uci.ics.textdb.api.constants.SchemaConstants) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Iterator(java.util.Iterator) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) PredicateBase(edu.uci.ics.textdb.exp.common.PredicateBase) Schema(edu.uci.ics.textdb.api.schema.Schema) List(java.util.List) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) edu.uci.ics.textdb.api.tuple(edu.uci.ics.textdb.api.tuple) Span(edu.uci.ics.textdb.api.span.Span) PropertyNameConstants(edu.uci.ics.textdb.exp.common.PropertyNameConstants) IOperator(edu.uci.ics.textdb.api.dataflow.IOperator) Attribute(edu.uci.ics.textdb.api.schema.Attribute) ArrayList(java.util.ArrayList) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span)

Example 93 with IField

use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.

the class NlpEntityOperator method processOneInputTuple.

@Override
public Tuple processOneInputTuple(Tuple inputTuple) throws TextDBException {
    List<Span> matchingResults = new ArrayList<>();
    for (String attributeName : predicate.getAttributeNames()) {
        IField field = inputTuple.getField(attributeName);
        matchingResults.addAll(extractNlpSpans(field, attributeName));
    }
    if (matchingResults.isEmpty()) {
        return null;
    }
    ListField<Span> spanListField = inputTuple.getField(predicate.getSpanListName());
    List<Span> spanList = spanListField.getValue();
    spanList.addAll(matchingResults);
    return inputTuple;
}
Also used : ArrayList(java.util.ArrayList) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span)

Example 94 with IField

use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.

the class RegexSplitOperator method populateOutputBuffer.

// If the regex does not have any match in the tuple, we return the whole string as the result.
private void populateOutputBuffer(Tuple inputTuple) throws TextDBException {
    if (inputTuple == null) {
        return;
    }
    AttributeType attributeType = this.inputSchema.getAttribute(predicate.getAttributeToSplit()).getAttributeType();
    if (attributeType != AttributeType.TEXT && attributeType != AttributeType.STRING) {
        return;
    }
    String strToSplit = inputTuple.getField(predicate.getAttributeToSplit()).getValue().toString();
    List<String> stringList = splitText(strToSplit);
    outputTupleBuffer = new ArrayList<>();
    for (String singleMatch : stringList) {
        List<IField> tupleFieldList = new ArrayList<>();
        // Generate the new UUID.
        tupleFieldList.add(IDField.newRandomID());
        for (String attributeName : inputSchema.getAttributeNames()) {
            // Remove the old ID.
            if (attributeName.equals(SchemaConstants._ID)) {
                continue;
            }
            if (attributeName.equals(predicate.getAttributeToSplit())) {
                if (attributeType == AttributeType.TEXT) {
                    tupleFieldList.add(new TextField(singleMatch));
                } else {
                    tupleFieldList.add(new StringField(singleMatch));
                }
            } else {
                tupleFieldList.add(inputTuple.getField(attributeName));
            }
        }
        outputTupleBuffer.add(new Tuple(outputSchema, tupleFieldList.stream().toArray(IField[]::new)));
    }
}
Also used : AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) StringField(edu.uci.ics.textdb.api.field.StringField) ArrayList(java.util.ArrayList) TextField(edu.uci.ics.textdb.api.field.TextField) IField(edu.uci.ics.textdb.api.field.IField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 95 with IField

use of edu.uci.ics.textdb.api.field.IField in project textdb by TextDB.

the class NlpSentimentOperator method getNextTuple.

@Override
public Tuple getNextTuple() throws TextDBException {
    if (cursor == CLOSED) {
        return null;
    }
    Tuple inputTuple = inputOperator.getNextTuple();
    if (inputTuple == null) {
        return null;
    }
    List<IField> outputFields = new ArrayList<>();
    outputFields.addAll(inputTuple.getFields());
    outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
    return new Tuple(outputSchema, outputFields);
}
Also used : ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Aggregations

IField (edu.uci.ics.textdb.api.field.IField)140 ArrayList (java.util.ArrayList)110 TextField (edu.uci.ics.textdb.api.field.TextField)105 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)102 Schema (edu.uci.ics.textdb.api.schema.Schema)90 Span (edu.uci.ics.textdb.api.span.Span)85 StringField (edu.uci.ics.textdb.api.field.StringField)84 Attribute (edu.uci.ics.textdb.api.schema.Attribute)84 Test (org.junit.Test)84 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)80 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)68 DateField (edu.uci.ics.textdb.api.field.DateField)64 SimpleDateFormat (java.text.SimpleDateFormat)63 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)24 ListField (edu.uci.ics.textdb.api.field.ListField)16 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)9 AttributeType (edu.uci.ics.textdb.api.schema.AttributeType)5 IOperator (edu.uci.ics.textdb.api.dataflow.IOperator)4 StorageException (edu.uci.ics.textdb.api.exception.StorageException)4