Search in sources :

Example 16 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class RegexSplitOperator method setUp.

@Override
protected void setUp() throws DataflowException {
    Schema inputSchema = inputOperator.getOutputSchema();
    // generate output schema by transforming the input schema based on what output format
    // is chosen (OneToOne vs. OneToMany)
    this.outputSchema = transformToOutputSchema(inputSchema);
    // check if attribute type is valid
    AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getType();
    boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
    if (!isValidType) {
        throw new DataflowException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
    }
}
Also used : AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 17 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class MysqlSource method open.

@Override
public void open() throws TexeraException {
    if (status == OPENED) {
        return;
    }
    // JDBC connection
    try {
        Class.forName("com.mysql.cj.jdbc.Driver").newInstance();
        String url = "jdbc:mysql://" + predicate.getHost() + ":" + predicate.getPort() + "/" + predicate.getDatabase() + "?autoReconnect=true&useSSL=true";
        this.connection = DriverManager.getConnection(url, predicate.getUsername(), predicate.getPassword());
        // set to readonly to improve efficiency
        connection.setReadOnly(true);
        DatabaseMetaData databaseMetaData = connection.getMetaData();
        ResultSet columns = databaseMetaData.getColumns(null, null, predicate.getTable(), null);
        while (columns.next()) {
            String columnName = columns.getString("COLUMN_NAME");
            int datatype = columns.getInt("DATA_TYPE");
            AttributeType attributeType;
            switch(datatype) {
                // 5 Types.SMALLINT
                case Types.SMALLINT:
                // 4 Types.INTEGER
                case Types.INTEGER:
                case // -2 Types.BINARY
                Types.BINARY:
                    attributeType = AttributeType.INTEGER;
                    break;
                // 6 Types.FLOAT
                case Types.FLOAT:
                // 7 Types.REAL
                case Types.REAL:
                // 8 Types.DOUBLE
                case Types.DOUBLE:
                case // 3 Types.NUMERIC
                Types.NUMERIC:
                    attributeType = AttributeType.DOUBLE;
                    break;
                case // 91 Types.DATE
                Types.DATE:
                    attributeType = AttributeType.DATE;
                    break;
                // 92 Types.TIME
                case Types.TIME:
                case // 93 Types.TIMESTAMP
                Types.TIMESTAMP:
                    attributeType = AttributeType.DATETIME;
                    break;
                // -6 Types.TINYINT
                case Types.TINYINT:
                // 16 Types.BOOLEAN
                case Types.BOOLEAN:
                case // -7 Types.BIT
                Types.BIT:
                    attributeType = AttributeType.BOOLEAN;
                    break;
                case // -1 Types.LONGVARCHAR
                Types.LONGVARCHAR:
                    attributeType = AttributeType.TEXT;
                    break;
                // -5 Types.BIGINT
                case Types.BIGINT:
                // 1 Types.CHAR
                case Types.CHAR:
                // 12 Types.VARCHAR
                case Types.VARCHAR:
                // 0 Types.NULL
                case Types.NULL:
                // 1111 Types.OTHER
                case Types.OTHER:
                default:
                    attributeType = AttributeType.STRING;
                    break;
            }
            this.schemaBuilder.add(columnName, attributeType);
        }
        this.outputSchema = this.schemaBuilder.build();
        status = OPENED;
    } catch (SQLException | InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new DataflowException("MysqlSink failed to connect to mysql database." + e.getMessage());
    }
}
Also used : AttributeType(edu.uci.ics.texera.api.schema.AttributeType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 18 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class PieChartSink method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new TexeraException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema schema = inputOperator.getOutputSchema();
    Attribute nameColumn = schema.getAttribute(predicate.getNameColumn());
    AttributeType nameColumnType = nameColumn.getType();
    if (!nameColumnType.equals(AttributeType.STRING) && !nameColumnType.equals(AttributeType.TEXT)) {
        throw new DataflowException("Type of name column should be string or text.");
    }
    Attribute dataColumn = schema.getAttribute(predicate.getDataColumn());
    AttributeType dataColumnType = dataColumn.getType();
    if (!dataColumnType.equals(AttributeType.DOUBLE) && !dataColumnType.equals(AttributeType.INTEGER)) {
        throw new DataflowException(("Type of data column should be integer or double."));
    }
    Double ratio = predicate.getPruneRatio();
    if (ratio < 0 || ratio > 1) {
        throw new DataflowException("Ratio should be in (0, 1).");
    }
    outputSchema = new Schema.Builder().add(nameColumn, dataColumn).build();
    cursor = OPENED;
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) TexeraException(edu.uci.ics.texera.api.exception.TexeraException)

Example 19 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class PieChartSink method buildOtherNameField.

private IField buildOtherNameField() {
    Attribute nameColumn = inputOperator.getOutputSchema().getAttribute(predicate.getNameColumn());
    AttributeType nameColumnType = nameColumn.getType();
    if (nameColumnType.equals(AttributeType.STRING)) {
        return new StringField("Other");
    }
    return new TextField("Other");
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField)

Example 20 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class BarChartSink method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new TexeraException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema schema = inputOperator.getOutputSchema();
    Attribute nameColumn = schema.getAttribute(predicate.getNameColumn());
    AttributeType nameColumnType = nameColumn.getType();
    if (!nameColumnType.equals(AttributeType.STRING) && !nameColumnType.equals(AttributeType.TEXT)) {
        throw new DataflowException("Type of name column should be string or text.");
    }
    attributes.add(nameColumn);
    List<String> dataColumns = predicate.getDataColumn();
    for (String name : dataColumns) {
        Attribute dataColumn = schema.getAttribute(name);
        AttributeType dataColumnType = dataColumn.getType();
        if (!dataColumnType.equals(AttributeType.DOUBLE) && !dataColumnType.equals(AttributeType.INTEGER)) {
            throw new DataflowException(("Type of data column should be integer or double."));
        }
        attributes.add(dataColumn);
    }
    outputSchema = new Schema.Builder().add(attributes).build();
    cursor = OPENED;
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) TexeraException(edu.uci.ics.texera.api.exception.TexeraException)

Aggregations

AttributeType (edu.uci.ics.texera.api.schema.AttributeType)31 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)21 Schema (edu.uci.ics.texera.api.schema.Schema)16 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)14 Attribute (edu.uci.ics.texera.api.schema.Attribute)13 Span (edu.uci.ics.texera.api.span.Span)10 Tuple (edu.uci.ics.texera.api.tuple.Tuple)7 SchemaConstants (edu.uci.ics.texera.api.constants.SchemaConstants)6 ListField (edu.uci.ics.texera.api.field.ListField)6 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 ErrorMessages (edu.uci.ics.texera.api.constants.ErrorMessages)5 AbstractSingleInputOperator (edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator)5 DataflowUtils (edu.uci.ics.texera.dataflow.utils.DataflowUtils)5 IField (edu.uci.ics.texera.api.field.IField)4 java.util (java.util)4 Matcher (java.util.regex.Matcher)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 KeywordMatchingType (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatchingType)2