use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.
the class PythonUDFOpDescV2 method getOutputSchema.
@Override
public Schema getOutputSchema(Schema[] schemas) {
Preconditions.checkArgument(schemas.length == 1);
Schema inputSchema = schemas[0];
Schema.Builder outputSchemaBuilder = Schema.newBuilder();
// keep the same schema from input
if (retainInputColumns) {
outputSchemaBuilder.add(inputSchema);
}
// for any pythonUDFType, it can add custom output columns (attributes).
if (outputColumns != null) {
if (retainInputColumns) {
// check if columns are duplicated
for (Attribute column : outputColumns) {
if (inputSchema.containsAttribute(column.getName()))
throw new RuntimeException("Column name " + column.getName() + " already exists!");
}
}
outputSchemaBuilder.add(outputColumns).build();
}
return outputSchemaBuilder.build();
}
use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.
the class PythonUDFOpDesc method getOutputSchema.
@Override
public Schema getOutputSchema(Schema[] schemas) {
Schema inputSchema = schemas[0];
// check if inputColumns are presented in inputSchema.
if (inputColumns != null) {
for (String column : inputColumns) {
if (!inputSchema.containsAttribute(column))
throw new RuntimeException("No such column:" + column + ".");
}
}
Schema.Builder outputSchemaBuilder = Schema.newBuilder();
if (pythonUDFType == PythonUDFType.SupervisedTraining) {
outputSchemaBuilder.add("class", AttributeType.STRING);
outputSchemaBuilder.add("precision", AttributeType.STRING);
outputSchemaBuilder.add("recall", AttributeType.STRING);
outputSchemaBuilder.add("f1-score", AttributeType.STRING);
outputSchemaBuilder.add("support", AttributeType.STRING);
} else if (pythonUDFType == PythonUDFType.UnsupervisedTraining) {
outputSchemaBuilder.add("output", AttributeType.STRING);
} else {
// for pythonUDFType with map and filter, keep the same schema from input
outputSchemaBuilder.add(inputSchema);
}
// for any pythonUDFType, it can add custom output columns (attributes).
if (outputColumns != null) {
for (Attribute column : outputColumns) {
if (inputSchema.containsAttribute(column.getName()))
throw new RuntimeException("Column name " + column.getName() + " already exists!");
}
outputSchemaBuilder.add(outputColumns).build();
}
return outputSchemaBuilder.build();
}
use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.
the class PythonUDFOpExec method sendConf.
private void sendConf() {
Schema confSchema = new Schema(Collections.singletonList(new Attribute("conf", AttributeType.STRING)));
Queue<Tuple> confTuples = new LinkedList<>();
// TODO: add configurations to be sent
writeArrowStream(flightClient, confTuples, ArrowUtils.fromTexeraSchema(confSchema), Channel.CONF, batchSize);
}
use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.
the class PythonUDFOpExec method sendArgs.
private void sendArgs() {
// Send user args to Server.
List<String> userArgs = new ArrayList<>();
if (inputColumns != null)
userArgs.addAll(inputColumns);
if (arguments != null)
userArgs.addAll(arguments);
if (outputColumns != null) {
for (Attribute a : outputColumns) userArgs.add(a.getName());
}
if (outerFilePaths != null)
userArgs.addAll(outerFilePaths);
Schema argsSchema = new Schema(Collections.singletonList(new Attribute("args", AttributeType.STRING)));
Queue<Tuple> argsTuples = new LinkedList<>();
for (String arg : userArgs) {
argsTuples.add(new Tuple(argsSchema, Collections.singletonList(arg)));
}
writeArrowStream(flightClient, argsTuples, ArrowUtils.fromTexeraSchema(argsSchema), Channel.ARGS, batchSize);
}
use of edu.uci.ics.texera.workflow.common.tuple.schema.Attribute in project textdb by TextDB.
the class PieChartOpPartialExec method processTexeraTuple.
@Override
public Iterator<Tuple> processTexeraTuple(Either<Tuple, InputExhausted> tuple, LinkIdentity input) {
if (tuple.isLeft()) {
Tuple inputTuple = tuple.left().get();
String name = inputTuple.getField(nameColumn);
Double data;
if (inputTuple.getSchema().getAttribute(dataColumn).getType() == AttributeType.STRING) {
data = Double.parseDouble(inputTuple.getField(dataColumn));
} else if (inputTuple.getSchema().getAttribute(dataColumn).getType() == AttributeType.INTEGER) {
data = Double.parseDouble(Integer.toString(inputTuple.getField(dataColumn)));
} else {
data = inputTuple.getField(dataColumn);
}
Schema oldSchema = tuple.left().get().getSchema();
Attribute dataAttribute = new Attribute(oldSchema.getAttribute(dataColumn).getName(), oldSchema.getAttribute(dataColumn).getType());
Schema newSchema = new Schema(Arrays.asList(oldSchema.getAttribute(nameColumn), dataAttribute));
if (noDataCol) {
result.add(Tuple.newBuilder(newSchema).addSequentially(new Object[] { name, data.intValue() }).build());
} else {
result.add(Tuple.newBuilder(newSchema).addSequentially(new Object[] { name, data }).build());
}
return JavaConverters.asScalaIterator(Collections.emptyIterator());
} else {
result.sort((left, right) -> {
double leftValue;
double rightValue;
if (noDataCol) {
leftValue = left.getInt(1);
rightValue = right.getInt(1);
} else {
leftValue = left.getDouble(1);
rightValue = right.getDouble(1);
}
return Double.compare(rightValue, leftValue);
});
return JavaConverters.asScalaIterator(result.iterator());
}
}