use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.
the class BarChartSink method processTuples.
@Override
public void processTuples() throws TexeraException {
List<Tuple> list = new ArrayList<>();
Tuple tuple;
while ((tuple = inputOperator.getNextTuple()) != null) {
list.add(tuple);
}
result = list.stream().map(e -> {
IField[] fields = attributes.stream().map(a -> e.getField(a.getName())).toArray(IField[]::new);
return new Tuple(outputSchema, fields);
}).collect(Collectors.toList());
}
use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.
the class CSVSink method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
inputOperator.open();
inputSchema = inputOperator.getOutputSchema();
outputSchema = new Schema(inputSchema.getAttributes().stream().filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants._ID)).filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants.PAYLOAD)).filter(attr -> !attr.getType().equals(AttributeType.LIST)).toArray(Attribute[]::new));
DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss");
fileName = df.format(new Date()) + ".csv";
File file = new File(csvIndexDirectory.resolve(fileName).toString());
try {
if (Files.notExists(csvIndexDirectory)) {
Files.createDirectories(csvIndexDirectory);
}
csvWriter = new CSVWriter(new FileWriter(file));
} catch (IOException e) {
throw new DataflowException(e);
}
// write csv headers
List<String> attributeNames = outputSchema.getAttributeNames();
csvWriter.writeNext(attributeNames.stream().toArray(String[]::new));
cursor = OPENED;
}
use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.
the class ExcelSink method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
inputOperator.open();
inputSchema = inputOperator.getOutputSchema();
outputSchema = new Schema(inputSchema.getAttributes().stream().filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants._ID)).filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants.PAYLOAD)).filter(attr -> !attr.getType().equals(AttributeType.LIST)).toArray(Attribute[]::new));
wb = new XSSFWorkbook();
DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss");
fileName = df.format(new Date()) + ".xlsx";
try {
if (Files.notExists(excelIndexDirectory)) {
Files.createDirectories(excelIndexDirectory);
}
fileOut = new FileOutputStream(excelIndexDirectory.resolve(fileName).toString());
} catch (IOException e) {
throw new DataflowException(e);
}
sheet = wb.createSheet("new sheet");
Row row = sheet.createRow(0);
List<String> attributeNames = outputSchema.getAttributeNames();
for (int i = 0; i < attributeNames.size(); i++) {
String attributeName = attributeNames.get(i);
row.createCell(i).setCellValue(attributeName);
}
cursor = OPENED;
}
use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.
the class RegexSplitOperator method transformToOutputSchema.
/*
* adds a new field to the schema, with name resultAttributeName and type list of strings
*/
public Schema transformToOutputSchema(Schema... inputSchema) throws DataflowException {
if (inputSchema.length != 1)
throw new TexeraException(String.format(ErrorMessages.NUMBER_OF_ARGUMENTS_DOES_NOT_MATCH, 1, inputSchema.length));
Schema.checkAttributeExists(inputSchema[0], predicate.getInputAttributeName());
Schema.checkAttributeNotExists(inputSchema[0], predicate.getResultAttributeName());
if (predicate.getOutputType() == RegexOutputType.ONE_TO_ONE)
return new Schema.Builder().add(inputSchema[0]).add(predicate.getResultAttributeName(), AttributeType.LIST).build();
else
return new Schema.Builder().add(inputSchema[0]).add(predicate.getResultAttributeName(), AttributeType.TEXT).build();
}
use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.
the class NltkSentimentOperator method open.
/**
* When this operator is opened, it executes the python script, which constructs a {@code FlightServer}
* object which is then up and running in the specified address. The operator calls
* {@code flightClient.doAction(new Action("healthcheck"))} to check the status of the server, and then proceeds if
* successful (otherwise there will be an exception).
* @throws TexeraException
*/
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
// Flight related
try {
int portNumber = getFreeLocalPort();
Location location = new Location(URI.create("grpc+tcp://localhost:" + portNumber));
List<String> args = new ArrayList<>(Arrays.asList(PYTHON, PYTHONSCRIPT, Integer.toString(portNumber), PicklePath, predicate.getInputAttributeName(), predicate.getResultAttributeName()));
ProcessBuilder processBuilder = new ProcessBuilder(args).inheritIO();
// Start Flight server (Python process)
processBuilder.start();
// Connect to server
boolean connected = false;
int tryCount = 0;
while (!connected && tryCount < 5) {
try {
flightClient = FlightClient.builder(rootAllocator, location).build();
String message = new String(flightClient.doAction(new Action("healthcheck")).next().getBody(), StandardCharsets.UTF_8);
connected = message.equals("Flight Server is up and running!");
} catch (Exception e) {
System.out.println("Flight Client:\tNot connected to the server in this try.");
flightClient.close();
tryCount++;
}
}
if (tryCount == 5)
throw new DataflowException("Exceeded try limit of 5 when connecting to Flight Server!");
} catch (Exception e) {
throw new DataflowException(e.getMessage(), e);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// generate output schema by transforming the input schema
outputSchema = transformToOutputSchema(inputSchema);
cursor = OPENED;
tupleToPythonSchema = convertToArrowSchema(inputSchema);
innerIndexMap = new HashMap<>();
}
Aggregations