use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.
the class PieChartOpFinalExec method processTexeraTuple.
@Override
public scala.collection.Iterator<Tuple> processTexeraTuple(Either<Tuple, InputExhausted> tuple, LinkIdentity input) {
if (tuple.isLeft()) {
if (noDataCol) {
sum += tuple.left().get().getInt(1);
} else {
sum += tuple.left().get().getDouble(1);
}
tempList.add(tuple.left().get());
if (resultSchema == null)
resultSchema = tuple.left().get().getSchema();
return JavaConverters.asScalaIterator(Collections.emptyIterator());
} else {
// sort all tuples in descending order
tempList.sort((left, right) -> {
double leftValue;
double rightValue;
if (noDataCol) {
leftValue = left.getInt(1);
rightValue = right.getInt(1);
} else {
leftValue = left.getDouble(1);
rightValue = right.getDouble(1);
}
return Double.compare(rightValue, leftValue);
});
// process the sorted rows, if the cumulative sum is greater than ratio * sum.
// stop adding tuples, add new row called "Other" instead.
double total = 0.0;
for (Tuple t : tempList) {
if (noDataCol) {
total += t.getInt(1);
} else {
total += t.getDouble(1);
}
resultList.add(t);
if (total / sum > pruneRatio) {
if (noDataCol) {
int otherDataField = (int) (sum - total);
resultList.add(Tuple.newBuilder(resultSchema).addSequentially(new Object[] { "Other", otherDataField }).build());
} else {
double otherDataField = sum - total;
resultList.add(Tuple.newBuilder(resultSchema).addSequentially(new Object[] { "Other", otherDataField }).build());
}
break;
}
}
return JavaConverters.asScalaIterator(resultList.iterator());
}
}
use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.
the class LinearRegressionOpExec method predict.
@Override
public void predict(Tuple[] minibatch) {
results = new Double[minibatch.length];
int tIdx = 0;
for (Tuple t : minibatch) {
Double x = Double.valueOf(t.getField(xAttr));
results[tIdx] = (w_current * x) + b_current;
tIdx++;
}
}
use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.
the class WordCloudOpPartialExec method calculateWordCount.
private static List<Tuple> calculateWordCount(List<String> texts, Analyzer luceneAnalyzer) throws Exception {
HashMap<String, Integer> termFreqMap = new HashMap<>();
for (String text : texts) {
TokenStream tokenStream = luceneAnalyzer.tokenStream(null, new StringReader(text));
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
int charStart = offsetAttribute.startOffset();
int charEnd = offsetAttribute.endOffset();
String termStr = text.substring(charStart, charEnd).toLowerCase();
if (!EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.contains(termStr))
termFreqMap.put(termStr, termFreqMap.get(termStr) == null ? 1 : termFreqMap.get(termStr) + 1);
}
tokenStream.close();
}
List<Tuple> termFreqTuples = new ArrayList<>();
for (Map.Entry<String, Integer> e : termFreqMap.entrySet()) {
termFreqTuples.add(Tuple.newBuilder(partialAggregateSchema).addSequentially(new Object[] { e.getKey(), e.getValue() }).build());
}
return termFreqTuples;
}
use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.
the class PythonUDFOpExec method sendConf.
private void sendConf() {
Schema confSchema = new Schema(Collections.singletonList(new Attribute("conf", AttributeType.STRING)));
Queue<Tuple> confTuples = new LinkedList<>();
// TODO: add configurations to be sent
writeArrowStream(flightClient, confTuples, ArrowUtils.fromTexeraSchema(confSchema), Channel.CONF, batchSize);
}
use of edu.uci.ics.texera.workflow.common.tuple.Tuple in project textdb by TextDB.
the class PythonUDFOpExec method sendArgs.
private void sendArgs() {
// Send user args to Server.
List<String> userArgs = new ArrayList<>();
if (inputColumns != null)
userArgs.addAll(inputColumns);
if (arguments != null)
userArgs.addAll(arguments);
if (outputColumns != null) {
for (Attribute a : outputColumns) userArgs.add(a.getName());
}
if (outerFilePaths != null)
userArgs.addAll(outerFilePaths);
Schema argsSchema = new Schema(Collections.singletonList(new Attribute("args", AttributeType.STRING)));
Queue<Tuple> argsTuples = new LinkedList<>();
for (String arg : userArgs) {
argsTuples.add(new Tuple(argsSchema, Collections.singletonList(arg)));
}
writeArrowStream(flightClient, argsTuples, ArrowUtils.fromTexeraSchema(argsSchema), Channel.ARGS, batchSize);
}
Aggregations