Search in sources :

Example 41 with TexeraException

use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.

the class BarChartSink method processTuples.

@Override
public void processTuples() throws TexeraException {
    List<Tuple> list = new ArrayList<>();
    Tuple tuple;
    while ((tuple = inputOperator.getNextTuple()) != null) {
        list.add(tuple);
    }
    result = list.stream().map(e -> {
        IField[] fields = attributes.stream().map(a -> e.getField(a.getName())).toArray(IField[]::new);
        return new Tuple(outputSchema, fields);
    }).collect(Collectors.toList());
}
Also used : Tuple(edu.uci.ics.texera.api.tuple.Tuple) VisualizationConstants(edu.uci.ics.texera.dataflow.sink.VisualizationConstants) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) VisualizationOperator(edu.uci.ics.texera.dataflow.sink.VisualizationOperator) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) List(java.util.List) IField(edu.uci.ics.texera.api.field.IField) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) Attribute(edu.uci.ics.texera.api.schema.Attribute) ArrayList(java.util.ArrayList) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 42 with TexeraException

use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.

the class CSVSink method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    inputOperator.open();
    inputSchema = inputOperator.getOutputSchema();
    outputSchema = new Schema(inputSchema.getAttributes().stream().filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants._ID)).filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants.PAYLOAD)).filter(attr -> !attr.getType().equals(AttributeType.LIST)).toArray(Attribute[]::new));
    DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss");
    fileName = df.format(new Date()) + ".csv";
    File file = new File(csvIndexDirectory.resolve(fileName).toString());
    try {
        if (Files.notExists(csvIndexDirectory)) {
            Files.createDirectories(csvIndexDirectory);
        }
        csvWriter = new CSVWriter(new FileWriter(file));
    } catch (IOException e) {
        throw new DataflowException(e);
    }
    // write csv headers
    List<String> attributeNames = outputSchema.getAttributeNames();
    csvWriter.writeNext(attributeNames.stream().toArray(String[]::new));
    cursor = OPENED;
}
Also used : Files(java.nio.file.Files) Date(java.util.Date) CSVWriter(au.com.bytecode.opencsv.CSVWriter) Tuple(edu.uci.ics.texera.api.tuple.Tuple) FileWriter(java.io.FileWriter) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) SimpleDateFormat(java.text.SimpleDateFormat) IOException(java.io.IOException) File(java.io.File) ArrayList(java.util.ArrayList) List(java.util.List) SchemaConstants(edu.uci.ics.texera.api.constants.SchemaConstants) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Utils(edu.uci.ics.texera.api.utils.Utils) ISink(edu.uci.ics.texera.api.dataflow.ISink) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) Attribute(edu.uci.ics.texera.api.schema.Attribute) Path(java.nio.file.Path) DateFormat(java.text.DateFormat) Schema(edu.uci.ics.texera.api.schema.Schema) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) FileWriter(java.io.FileWriter) CSVWriter(au.com.bytecode.opencsv.CSVWriter) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IOException(java.io.IOException) SimpleDateFormat(java.text.SimpleDateFormat) File(java.io.File) Date(java.util.Date)

Example 43 with TexeraException

use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.

the class ExcelSink method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    inputOperator.open();
    inputSchema = inputOperator.getOutputSchema();
    outputSchema = new Schema(inputSchema.getAttributes().stream().filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants._ID)).filter(attr -> !attr.getName().equalsIgnoreCase(SchemaConstants.PAYLOAD)).filter(attr -> !attr.getType().equals(AttributeType.LIST)).toArray(Attribute[]::new));
    wb = new XSSFWorkbook();
    DateFormat df = new SimpleDateFormat("yyyyMMdd-HHmmss");
    fileName = df.format(new Date()) + ".xlsx";
    try {
        if (Files.notExists(excelIndexDirectory)) {
            Files.createDirectories(excelIndexDirectory);
        }
        fileOut = new FileOutputStream(excelIndexDirectory.resolve(fileName).toString());
    } catch (IOException e) {
        throw new DataflowException(e);
    }
    sheet = wb.createSheet("new sheet");
    Row row = sheet.createRow(0);
    List<String> attributeNames = outputSchema.getAttributeNames();
    for (int i = 0; i < attributeNames.size(); i++) {
        String attributeName = attributeNames.get(i);
        row.createCell(i).setCellValue(attributeName);
    }
    cursor = OPENED;
}
Also used : Date(java.util.Date) Tuple(edu.uci.ics.texera.api.tuple.Tuple) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) SimpleDateFormat(java.text.SimpleDateFormat) ArrayList(java.util.ArrayList) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) SchemaConstants(edu.uci.ics.texera.api.constants.SchemaConstants) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) IField(edu.uci.ics.texera.api.field.IField) ISink(edu.uci.ics.texera.api.dataflow.ISink) Cell(org.apache.poi.ss.usermodel.Cell) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Schema(edu.uci.ics.texera.api.schema.Schema) Attribute(edu.uci.ics.texera.api.schema.Attribute) Path(java.nio.file.Path) DateFormat(java.text.DateFormat) IntegerField(edu.uci.ics.texera.api.field.IntegerField) Sheet(org.apache.poi.ss.usermodel.Sheet) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) DoubleField(edu.uci.ics.texera.api.field.DoubleField) DateField(edu.uci.ics.texera.api.field.DateField) List(java.util.List) Workbook(org.apache.poi.ss.usermodel.Workbook) Utils(edu.uci.ics.texera.api.utils.Utils) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Row(org.apache.poi.ss.usermodel.Row) Schema(edu.uci.ics.texera.api.schema.Schema) IOException(java.io.IOException) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DateFormat(java.text.DateFormat) FileOutputStream(java.io.FileOutputStream) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Row(org.apache.poi.ss.usermodel.Row) SimpleDateFormat(java.text.SimpleDateFormat)

Example 44 with TexeraException

use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.

the class RegexSplitOperator method transformToOutputSchema.

/*
     * adds a new field to the schema, with name resultAttributeName and type list of strings
     */
public Schema transformToOutputSchema(Schema... inputSchema) throws DataflowException {
    if (inputSchema.length != 1)
        throw new TexeraException(String.format(ErrorMessages.NUMBER_OF_ARGUMENTS_DOES_NOT_MATCH, 1, inputSchema.length));
    Schema.checkAttributeExists(inputSchema[0], predicate.getInputAttributeName());
    Schema.checkAttributeNotExists(inputSchema[0], predicate.getResultAttributeName());
    if (predicate.getOutputType() == RegexOutputType.ONE_TO_ONE)
        return new Schema.Builder().add(inputSchema[0]).add(predicate.getResultAttributeName(), AttributeType.LIST).build();
    else
        return new Schema.Builder().add(inputSchema[0]).add(predicate.getResultAttributeName(), AttributeType.TEXT).build();
}
Also used : TexeraException(edu.uci.ics.texera.api.exception.TexeraException)

Example 45 with TexeraException

use of edu.uci.ics.texera.api.exception.TexeraException in project textdb by TextDB.

the class NltkSentimentOperator method open.

/**
 * When this operator is opened, it executes the python script, which constructs a {@code FlightServer}
 * object which is then up and running in the specified address. The operator calls
 * {@code flightClient.doAction(new Action("healthcheck"))} to check the status of the server, and then proceeds if
 * successful (otherwise there will be an exception).
 * @throws TexeraException
 */
@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    // Flight related
    try {
        int portNumber = getFreeLocalPort();
        Location location = new Location(URI.create("grpc+tcp://localhost:" + portNumber));
        List<String> args = new ArrayList<>(Arrays.asList(PYTHON, PYTHONSCRIPT, Integer.toString(portNumber), PicklePath, predicate.getInputAttributeName(), predicate.getResultAttributeName()));
        ProcessBuilder processBuilder = new ProcessBuilder(args).inheritIO();
        // Start Flight server (Python process)
        processBuilder.start();
        // Connect to server
        boolean connected = false;
        int tryCount = 0;
        while (!connected && tryCount < 5) {
            try {
                flightClient = FlightClient.builder(rootAllocator, location).build();
                String message = new String(flightClient.doAction(new Action("healthcheck")).next().getBody(), StandardCharsets.UTF_8);
                connected = message.equals("Flight Server is up and running!");
            } catch (Exception e) {
                System.out.println("Flight Client:\tNot connected to the server in this try.");
                flightClient.close();
                tryCount++;
            }
        }
        if (tryCount == 5)
            throw new DataflowException("Exceeded try limit of 5 when connecting to Flight Server!");
    } catch (Exception e) {
        throw new DataflowException(e.getMessage(), e);
    }
    inputOperator.open();
    Schema inputSchema = inputOperator.getOutputSchema();
    // generate output schema by transforming the input schema
    outputSchema = transformToOutputSchema(inputSchema);
    cursor = OPENED;
    tupleToPythonSchema = convertToArrowSchema(inputSchema);
    innerIndexMap = new HashMap<>();
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IOException(java.io.IOException)

Aggregations

TexeraException (edu.uci.ics.texera.api.exception.TexeraException)46 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)27 Schema (edu.uci.ics.texera.api.schema.Schema)20 IOException (java.io.IOException)20 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)17 Tuple (edu.uci.ics.texera.api.tuple.Tuple)17 Attribute (edu.uci.ics.texera.api.schema.Attribute)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)12 ArrayList (java.util.ArrayList)10 List (java.util.List)10 ErrorMessages (edu.uci.ics.texera.api.constants.ErrorMessages)9 IField (edu.uci.ics.texera.api.field.IField)8 JsonNode (com.fasterxml.jackson.databind.JsonNode)7 TexeraWebException (edu.uci.ics.texera.web.TexeraWebException)7 Collectors (java.util.stream.Collectors)7 SchemaConstants (edu.uci.ics.texera.api.constants.SchemaConstants)6 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)6 ISink (edu.uci.ics.texera.api.dataflow.ISink)5 IntegerField (edu.uci.ics.texera.api.field.IntegerField)4 LogicalPlan (edu.uci.ics.texera.dataflow.plangen.LogicalPlan)4