Search in sources :

Example 1 with Tuple

use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.

the class PieChartOpFinalExec method processTexeraTuple.

@Override
public scala.collection.Iterator<Tuple> processTexeraTuple(Either<Tuple, InputExhausted> tuple, LinkIdentity input) {
    if (tuple.isLeft()) {
        if (noDataCol) {
            sum += tuple.left().get().getInt(1);
        } else {
            sum += tuple.left().get().getDouble(1);
        }
        tempList.add(tuple.left().get());
        if (resultSchema == null)
            resultSchema = tuple.left().get().getSchema();
        return JavaConverters.asScalaIterator(Collections.emptyIterator());
    } else {
        // sort all tuples in descending order
        tempList.sort((left, right) -> {
            double leftValue;
            double rightValue;
            if (noDataCol) {
                leftValue = left.getInt(1);
                rightValue = right.getInt(1);
            } else {
                leftValue = left.getDouble(1);
                rightValue = right.getDouble(1);
            }
            return Double.compare(rightValue, leftValue);
        });
        // process the sorted rows, if the cumulative sum is greater than ratio * sum.
        // stop adding tuples, add new row called "Other" instead.
        double total = 0.0;
        for (Tuple t : tempList) {
            if (noDataCol) {
                total += t.getInt(1);
            } else {
                total += t.getDouble(1);
            }
            resultList.add(t);
            if (total / sum > pruneRatio) {
                if (noDataCol) {
                    int otherDataField = (int) (sum - total);
                    resultList.add(Tuple.newBuilder(resultSchema).addSequentially(new Object[] { "Other", otherDataField }).build());
                } else {
                    double otherDataField = sum - total;
                    resultList.add(Tuple.newBuilder(resultSchema).addSequentially(new Object[] { "Other", otherDataField }).build());
                }
                break;
            }
        }
        return JavaConverters.asScalaIterator(resultList.iterator());
    }
}
Also used : Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 2 with Tuple

use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.

the class LinearRegressionOpExec method predict.

@Override
public void predict(Tuple[] minibatch) {
    results = new Double[minibatch.length];
    int tIdx = 0;
    for (Tuple t : minibatch) {
        Double x = Double.valueOf(t.getField(xAttr));
        results[tIdx] = (w_current * x) + b_current;
        tIdx++;
    }
}
Also used : Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 3 with Tuple

use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.

the class WordCloudOpPartialExec method calculateWordCount.

private static List<Tuple> calculateWordCount(List<String> texts, Analyzer luceneAnalyzer) throws Exception {
    HashMap<String, Integer> termFreqMap = new HashMap<>();
    for (String text : texts) {
        TokenStream tokenStream = luceneAnalyzer.tokenStream(null, new StringReader(text));
        OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            int charStart = offsetAttribute.startOffset();
            int charEnd = offsetAttribute.endOffset();
            String termStr = text.substring(charStart, charEnd).toLowerCase();
            if (!EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.contains(termStr))
                termFreqMap.put(termStr, termFreqMap.get(termStr) == null ? 1 : termFreqMap.get(termStr) + 1);
        }
        tokenStream.close();
    }
    List<Tuple> termFreqTuples = new ArrayList<>();
    for (Map.Entry<String, Integer> e : termFreqMap.entrySet()) {
        termFreqTuples.add(Tuple.newBuilder(partialAggregateSchema).addSequentially(new Object[] { e.getKey(), e.getValue() }).build());
    }
    return termFreqTuples;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 4 with Tuple

use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.

the class PythonUDFOpExec method sendConf.

private void sendConf() {
    Schema confSchema = new Schema(Collections.singletonList(new Attribute("conf", AttributeType.STRING)));
    Queue<Tuple> confTuples = new LinkedList<>();
    // TODO: add configurations to be sent
    writeArrowStream(flightClient, confTuples, ArrowUtils.fromTexeraSchema(confSchema), Channel.CONF, batchSize);
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Example 5 with Tuple

use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.

the class PythonUDFOpExec method sendArgs.

private void sendArgs() {
    // Send user args to Server.
    List<String> userArgs = new ArrayList<>();
    if (inputColumns != null)
        userArgs.addAll(inputColumns);
    if (arguments != null)
        userArgs.addAll(arguments);
    if (outputColumns != null) {
        for (Attribute a : outputColumns) userArgs.add(a.getName());
    }
    if (outerFilePaths != null)
        userArgs.addAll(outerFilePaths);
    Schema argsSchema = new Schema(Collections.singletonList(new Attribute("args", AttributeType.STRING)));
    Queue<Tuple> argsTuples = new LinkedList<>();
    for (String arg : userArgs) {
        argsTuples.add(new Tuple(argsSchema, Collections.singletonList(arg)));
    }
    writeArrowStream(flightClient, argsTuples, ArrowUtils.fromTexeraSchema(argsSchema), Channel.ARGS, batchSize);
}
Also used : Attribute(edu.uci.ics.texera.workflow.common.tuple.schema.Attribute) Schema(edu.uci.ics.texera.workflow.common.tuple.schema.Schema) Tuple(edu.uci.ics.texera.workflow.common.tuple.Tuple)

Aggregations

Tuple (edu.uci.ics.texera.workflow.common.tuple.Tuple)7 Attribute (edu.uci.ics.texera.workflow.common.tuple.schema.Attribute)3 Schema (edu.uci.ics.texera.workflow.common.tuple.schema.Schema)3 StringReader (java.io.StringReader)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1