Search in sources :

Example 1 with Schema

use of edu.uci.ics.texera.workflow.common.tuple.schema.Schema in project textdb by TextDB.

the class PythonUDFOpDescV2 method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Preconditions.checkArgument(schemas.length == 1);
    Schema inputSchema = schemas[0];
    Schema.Builder outputSchemaBuilder = Schema.newBuilder();
    // keep the same schema from input
    if (retainInputColumns) {
        outputSchemaBuilder.add(inputSchema);
    }
    // for any pythonUDFType, it can add custom output columns (attributes).
    if (outputColumns != null) {
        if (retainInputColumns) {
            // check if columns are duplicated
            for (Attribute column : outputColumns) {
                if (inputSchema.containsAttribute(column.getName()))
                    throw new RuntimeException("Column name " + column.getName() + " already exists!");
            }
        }
        outputSchemaBuilder.add(outputColumns).build();
    }
    return outputSchemaBuilder.build();
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 2 with Schema

use of edu.uci.ics.texera.workflow.common.tuple.schema.Schema in project textdb by TextDB.

the class TypeCastingOpDesc method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Preconditions.checkArgument(schemas.length == 1);
    Schema outputSchema = schemas[0];
    for (TypeCastingUnit unit : typeCastingUnits) {
        outputSchema = AttributeTypeUtils.SchemaCasting(outputSchema, unit.attribute, unit.resultType);
    }
    return outputSchema;
}
Also used : Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 3 with Schema

use of edu.uci.ics.texera.workflow.common.tuple.schema.Schema in project textdb by TextDB.

the class PythonUDFOpDesc method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Schema inputSchema = schemas[0];
    // check if inputColumns are presented in inputSchema.
    if (inputColumns != null) {
        for (String column : inputColumns) {
            if (!inputSchema.containsAttribute(column))
                throw new RuntimeException("No such column:" + column + ".");
        }
    }
    Schema.Builder outputSchemaBuilder = Schema.newBuilder();
    if (pythonUDFType == PythonUDFType.SupervisedTraining) {
        outputSchemaBuilder.add("class", AttributeType.STRING);
        outputSchemaBuilder.add("precision", AttributeType.STRING);
        outputSchemaBuilder.add("recall", AttributeType.STRING);
        outputSchemaBuilder.add("f1-score", AttributeType.STRING);
        outputSchemaBuilder.add("support", AttributeType.STRING);
    } else if (pythonUDFType == PythonUDFType.UnsupervisedTraining) {
        outputSchemaBuilder.add("output", AttributeType.STRING);
    } else {
        // for pythonUDFType with map and filter, keep the same schema from input
        outputSchemaBuilder.add(inputSchema);
    }
    // for any pythonUDFType, it can add custom output columns (attributes).
    if (outputColumns != null) {
        for (Attribute column : outputColumns) {
            if (inputSchema.containsAttribute(column.getName()))
                throw new RuntimeException("Column name " + column.getName() + " already exists!");
        }
        outputSchemaBuilder.add(outputColumns).build();
    }
    return outputSchemaBuilder.build();
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 4 with Schema

use of edu.uci.ics.texera.workflow.common.tuple.schema.Schema in project textdb by TextDB.

the class ProgressiveSinkOpDesc method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Preconditions.checkArgument(schemas.length == 1);
    Schema inputSchema = schemas[0];
    // SET_SNAPSHOT:
    if (this.outputMode.equals(SET_SNAPSHOT)) {
        if (inputSchema.containsAttribute(ProgressiveUtils.insertRetractFlagAttr().getName())) {
            // input is insert/retract delta: the flag column is removed in output
            return Schema.newBuilder().add(inputSchema).remove(ProgressiveUtils.insertRetractFlagAttr().getName()).build();
        } else {
            // input is insert-only delta: output schema is the same as input schema
            return inputSchema;
        }
    } else {
        // SET_DELTA: output schema is always the same as input schema
        return inputSchema;
    }
}
Also used : Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 5 with Schema

use of edu.uci.ics.texera.workflow.common.tuple.schema.Schema in project textdb by TextDB.

the class PythonUDFOpExec method sendConf.

private void sendConf() {
    Schema confSchema = new Schema(Collections.singletonList(new Attribute("conf", AttributeType.STRING)));
    Queue<Tuple> confTuples = new LinkedList<>();
    // TODO: add configurations to be sent
    writeArrowStream(flightClient, confTuples, ArrowUtils.fromTexeraSchema(confSchema), Channel.CONF, batchSize);
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Aggregations

Schema (edu.uci.ics.texera.workflow.common.tuple.schema.Schema)7 Attribute (edu.uci.ics.texera.workflow.common.tuple.schema.Attribute)5 Tuple (edu.uci.ics.texera.workflow.common.tuple.Tuple)3