Search in sources :

Example 26 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class SimilarityJoinPredicate method generateOutputSchema.

@Override
public Schema generateOutputSchema(Schema innerOperatorSchema, Schema outerOperatorSchema) throws DataflowException {
    List<Attribute> outputAttributeList = new ArrayList<>();
    // add _ID field first
    outputAttributeList.add(SchemaConstants._ID_ATTRIBUTE);
    for (Attribute attr : innerOperatorSchema.getAttributes()) {
        String attrName = attr.getName();
        AttributeType attrType = attr.getType();
        // ignore _id, spanList, and payload
        if (attrName.equals(SchemaConstants._ID) || attrName.equals(SchemaConstants.SPAN_LIST) || attrName.equals(SchemaConstants.PAYLOAD)) {
            continue;
        }
        outputAttributeList.add(new Attribute(INNER_PREFIX + attrName, attrType));
    }
    for (Attribute attr : outerOperatorSchema.getAttributes()) {
        String attrName = attr.getName();
        AttributeType attrType = attr.getType();
        // ignore _id, spanList, and payload
        if (attrName.equals(SchemaConstants._ID) || attrName.equals(SchemaConstants.SPAN_LIST) || attrName.equals(SchemaConstants.PAYLOAD)) {
            continue;
        }
        outputAttributeList.add(new Attribute(OUTER_PREFIX + attrName, attrType));
    }
    // add spanList field
    outputAttributeList.add(SchemaConstants.SPAN_LIST_ATTRIBUTE);
    // add payload field if one of them contains payload
    if (innerOperatorSchema.containsAttribute(SchemaConstants.PAYLOAD) || outerOperatorSchema.containsAttribute(SchemaConstants.PAYLOAD)) {
        outputAttributeList.add(SchemaConstants.PAYLOAD_ATTRIBUTE);
    }
    return new Schema(outputAttributeList.stream().toArray(Attribute[]::new));
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema)

Example 27 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class EmojiSentimentOperator method transformToOutputSchema.

public Schema transformToOutputSchema(Schema... inputSchema) {
    if (inputSchema.length != 1)
        throw new TexeraException(String.format(ErrorMessages.NUMBER_OF_ARGUMENTS_DOES_NOT_MATCH, 1, inputSchema.length));
    // check if input schema is present
    if (!inputSchema[0].containsAttribute(predicate.getInputAttributeName())) {
        throw new TexeraException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema[0].getAttributeNames()));
    }
    // check if attribute type is valid
    AttributeType inputAttributeType = inputSchema[0].getAttribute(predicate.getInputAttributeName()).getType();
    boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
    if (!isValidType) {
        throw new TexeraException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
    }
    return transformSchema(inputSchema[0]);
}
Also used : AttributeType(edu.uci.ics.texera.api.schema.AttributeType) TexeraException(edu.uci.ics.texera.api.exception.TexeraException)

Example 28 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class KeywordMatcherSourceOperator method buildConjunctionQuery.

private Query buildConjunctionQuery() throws DataflowException {
    BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
    for (String attributeName : this.predicate.getAttributeNames()) {
        AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getType();
        // types other than TEXT and STRING: throw Exception for now
        if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
            throw new DataflowException("KeywordPredicate: Fields other than STRING and TEXT are not supported yet");
        }
        if (attributeType == AttributeType.STRING) {
            Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery()));
            booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
        }
        if (attributeType == AttributeType.TEXT) {
            BooleanQuery.Builder fieldQueryBuilder = new BooleanQuery.Builder();
            for (String token : queryTokenSet) {
                Query termQuery = new TermQuery(new Term(attributeName, token.toLowerCase()));
                fieldQueryBuilder.add(termQuery, BooleanClause.Occur.MUST);
            }
            booleanQueryBuilder.add(fieldQueryBuilder.build(), BooleanClause.Occur.SHOULD);
        }
    }
    return booleanQueryBuilder.build();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Term(org.apache.lucene.index.Term)

Example 29 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class MysqlSource method getNextTuple.

@Override
public Tuple getNextTuple() throws TexeraException {
    if (status == CLOSED) {
        throw new DataflowException(ErrorMessages.OPERATOR_NOT_OPENED);
    }
    try {
        if (!querySent) {
            PreparedStatement ps = this.connection.prepareStatement(generateSqlQuery(predicate));
            int curIndex = 1;
            if (!predicate.getColumn().isEmpty() && !predicate.getKeywords().isEmpty()) {
                ps.setString(curIndex, predicate.getKeywords());
                curIndex += 1;
            }
            if (predicate.getLimit() != Integer.MAX_VALUE) {
                ps.setObject(curIndex, predicate.getLimit(), Types.INTEGER);
                curIndex += 1;
            }
            if (predicate.getOffset() != 0) {
                ps.setObject(curIndex, predicate.getOffset(), Types.INTEGER);
            }
            this.rs = ps.executeQuery();
            querySent = true;
        }
        while (rs.next()) {
            List<IField> row = new ArrayList();
            for (Attribute a : this.outputSchema.getAttributes()) {
                AttributeType attrType = a.getType();
                String value = rs.getString(a.getName());
                switch(attrType) {
                    case STRING:
                        value = value == null ? "" : value;
                        row.add(new StringField(value));
                        break;
                    case TEXT:
                        value = value == null ? "" : value;
                        row.add(new TextField(value));
                        break;
                    case INTEGER:
                        // allowing null value Integer to be in the workflow
                        if (value != null) {
                            row.add(new IntegerField(new Integer(value)));
                        } else {
                            row.add(new IntegerField(null));
                        }
                        break;
                    case DOUBLE:
                        if (value != null) {
                            row.add(new DoubleField(new Double(value)));
                        } else {
                            row.add(new DoubleField(null));
                        }
                        break;
                    case DATE:
                        row.add(new DateField(value));
                        break;
                    case DATETIME:
                        // a formatter is needed because
                        // mysql format is    yyyy-MM-dd HH:mm:ss
                        // but java format is yyyy-MM-ddTHH:mm:ss by default
                        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
                        row.add(new DateTimeField(value, formatter));
                        break;
                    case BOOLEAN:
                        if (value.equals("0")) {
                            row.add(new StringField("false"));
                        } else {
                            row.add(new StringField("true"));
                        }
                }
            }
            IField[] iFieldArray = row.toArray(new IField[0]);
            return new Tuple(this.outputSchema, iFieldArray);
        }
    } catch (SQLException e) {
        throw new DataflowException(e.getMessage());
    }
    return null;
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) ArrayList(java.util.ArrayList) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) DateTimeFormatter(java.time.format.DateTimeFormatter) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 30 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class PieChartSink method buildOtherDataField.

private IField buildOtherDataField(double value) {
    Attribute dataColumn = inputOperator.getOutputSchema().getAttribute(predicate.getDataColumn());
    AttributeType dataColumnType = dataColumn.getType();
    if (dataColumnType.equals(AttributeType.INTEGER)) {
        return new IntegerField((int) value);
    }
    return new DoubleField(value);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) IntegerField(edu.uci.ics.texera.api.field.IntegerField) DoubleField(edu.uci.ics.texera.api.field.DoubleField)

Aggregations

AttributeType (edu.uci.ics.texera.api.schema.AttributeType)31 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)21 Schema (edu.uci.ics.texera.api.schema.Schema)16 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)14 Attribute (edu.uci.ics.texera.api.schema.Attribute)13 Span (edu.uci.ics.texera.api.span.Span)10 Tuple (edu.uci.ics.texera.api.tuple.Tuple)7 SchemaConstants (edu.uci.ics.texera.api.constants.SchemaConstants)6 ListField (edu.uci.ics.texera.api.field.ListField)6 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 ErrorMessages (edu.uci.ics.texera.api.constants.ErrorMessages)5 AbstractSingleInputOperator (edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator)5 DataflowUtils (edu.uci.ics.texera.dataflow.utils.DataflowUtils)5 IField (edu.uci.ics.texera.api.field.IField)4 java.util (java.util)4 Matcher (java.util.regex.Matcher)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 KeywordMatchingType (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatchingType)2