Search in sources :

Example 1 with Attribute

use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.

the class PythonUDFOpDescV2 method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Preconditions.checkArgument(schemas.length == 1);
    Schema inputSchema = schemas[0];
    Schema.Builder outputSchemaBuilder = Schema.newBuilder();
    // keep the same schema from input
    if (retainInputColumns) {
        outputSchemaBuilder.add(inputSchema);
    }
    // for any pythonUDFType, it can add custom output columns (attributes).
    if (outputColumns != null) {
        if (retainInputColumns) {
            // check if columns are duplicated
            for (Attribute column : outputColumns) {
                if (inputSchema.containsAttribute(column.getName()))
                    throw new RuntimeException("Column name " + column.getName() + " already exists!");
            }
        }
        outputSchemaBuilder.add(outputColumns).build();
    }
    return outputSchemaBuilder.build();
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 2 with Attribute

use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.

the class PythonUDFOpDesc method getOutputSchema.

@Override
public Schema getOutputSchema(Schema[] schemas) {
    Schema inputSchema = schemas[0];
    // check if inputColumns are presented in inputSchema.
    if (inputColumns != null) {
        for (String column : inputColumns) {
            if (!inputSchema.containsAttribute(column))
                throw new RuntimeException("No such column:" + column + ".");
        }
    }
    Schema.Builder outputSchemaBuilder = Schema.newBuilder();
    if (pythonUDFType == PythonUDFType.SupervisedTraining) {
        outputSchemaBuilder.add("class", AttributeType.STRING);
        outputSchemaBuilder.add("precision", AttributeType.STRING);
        outputSchemaBuilder.add("recall", AttributeType.STRING);
        outputSchemaBuilder.add("f1-score", AttributeType.STRING);
        outputSchemaBuilder.add("support", AttributeType.STRING);
    } else if (pythonUDFType == PythonUDFType.UnsupervisedTraining) {
        outputSchemaBuilder.add("output", AttributeType.STRING);
    } else {
        // for pythonUDFType with map and filter, keep the same schema from input
        outputSchemaBuilder.add(inputSchema);
    }
    // for any pythonUDFType, it can add custom output columns (attributes).
    if (outputColumns != null) {
        for (Attribute column : outputColumns) {
            if (inputSchema.containsAttribute(column.getName()))
                throw new RuntimeException("Column name " + column.getName() + " already exists!");
        }
        outputSchemaBuilder.add(outputColumns).build();
    }
    return outputSchemaBuilder.build();
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema)

Example 3 with Attribute

use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.

the class PythonUDFOpExec method sendConf.

private void sendConf() {
    Schema confSchema = new Schema(Collections.singletonList(new Attribute("conf", AttributeType.STRING)));
    Queue<Tuple> confTuples = new LinkedList<>();
    // TODO: add configurations to be sent
    writeArrowStream(flightClient, confTuples, ArrowUtils.fromTexeraSchema(confSchema), Channel.CONF, batchSize);
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 4 with Attribute

use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.

the class PythonUDFOpExec method sendArgs.

private void sendArgs() {
    // Send user args to Server.
    List<String> userArgs = new ArrayList<>();
    if (inputColumns != null)
        userArgs.addAll(inputColumns);
    if (arguments != null)
        userArgs.addAll(arguments);
    if (outputColumns != null) {
        for (Attribute a : outputColumns) userArgs.add(a.getName());
    }
    if (outerFilePaths != null)
        userArgs.addAll(outerFilePaths);
    Schema argsSchema = new Schema(Collections.singletonList(new Attribute("args", AttributeType.STRING)));
    Queue<Tuple> argsTuples = new LinkedList<>();
    for (String arg : userArgs) {
        argsTuples.add(new Tuple(argsSchema, Collections.singletonList(arg)));
    }
    writeArrowStream(flightClient, argsTuples, ArrowUtils.fromTexeraSchema(argsSchema), Channel.ARGS, batchSize);
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 5 with Attribute

use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.

the class PieChartOpPartialExec method processTexeraTuple.

@Override
public Iterator<Tuple> processTexeraTuple(Either<Tuple, InputExhausted> tuple, LinkIdentity input) {
    if (tuple.isLeft()) {
        Tuple inputTuple = tuple.left().get();
        String name = inputTuple.getField(nameColumn);
        Double data;
        if (inputTuple.getSchema().getAttribute(dataColumn).getType() == AttributeType.STRING) {
            data = Double.parseDouble(inputTuple.getField(dataColumn));
        } else if (inputTuple.getSchema().getAttribute(dataColumn).getType() == AttributeType.INTEGER) {
            data = Double.parseDouble(Integer.toString(inputTuple.getField(dataColumn)));
        } else {
            data = inputTuple.getField(dataColumn);
        }
        Schema oldSchema = tuple.left().get().getSchema();
        Attribute dataAttribute = new Attribute(oldSchema.getAttribute(dataColumn).getName(), oldSchema.getAttribute(dataColumn).getType());
        Schema newSchema = new Schema(Arrays.asList(oldSchema.getAttribute(nameColumn), dataAttribute));
        if (noDataCol) {
            result.add(Tuple.newBuilder(newSchema).addSequentially(new Object[] { name, data.intValue() }).build());
        } else {
            result.add(Tuple.newBuilder(newSchema).addSequentially(new Object[] { name, data }).build());
        }
        return JavaConverters.asScalaIterator(Collections.emptyIterator());
    } else {
        result.sort((left, right) -> {
            double leftValue;
            double rightValue;
            if (noDataCol) {
                leftValue = left.getInt(1);
                rightValue = right.getInt(1);
            } else {
                leftValue = left.getDouble(1);
                rightValue = right.getDouble(1);
            }
            return Double.compare(rightValue, leftValue);
        });
        return JavaConverters.asScalaIterator(result.iterator());
    }
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Aggregations

Attribute (edu.uci.ics.texera.workflow.common.tuple.schema.Attribute)5 Schema (edu.uci.ics.texera.workflow.common.tuple.schema.Schema)5 Tuple (edu.uci.ics.texera.workflow.common.tuple.Tuple)3