use of edu.uci.ics.texera.api.field.ListField in project textdb by TextDB.
the class SimilarityJoinPredicate method mergeTuples.
private Tuple mergeTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema, List<Span> mergeSpanList) {
List<IField> resultFields = new ArrayList<>();
for (String attrName : outputSchema.getAttributeNames()) {
// generate a new _ID field for this tuple
if (attrName.equals(SchemaConstants._ID)) {
IDField newID = new IDField(UUID.randomUUID().toString());
resultFields.add(newID);
// use the generated spanList
} else if (attrName.equals(SchemaConstants.SPAN_LIST)) {
resultFields.add(new ListField<Span>(mergeSpanList));
// put the payload of two tuples together
} else if (attrName.equals(SchemaConstants.PAYLOAD)) {
ListField<Span> innerPayloadField = innerTuple.getField(SchemaConstants.PAYLOAD);
List<Span> innerPayload = innerPayloadField.getValue();
ListField<Span> outerPayloadField = outerTuple.getField(SchemaConstants.PAYLOAD);
List<Span> outerPayload = outerPayloadField.getValue();
List<Span> resultPayload = new ArrayList<>();
resultPayload.addAll(innerPayload.stream().map(span -> addFieldPrefix(span, INNER_PREFIX)).collect(Collectors.toList()));
resultPayload.addAll(outerPayload.stream().map(span -> addFieldPrefix(span, "outer_")).collect(Collectors.toList()));
resultFields.add(new ListField<Span>(resultPayload));
// add other fields from inner/outer tuples
} else {
if (attrName.startsWith(INNER_PREFIX)) {
resultFields.add(innerTuple.getField(attrName.substring(INNER_PREFIX.length())));
} else if (attrName.startsWith(OUTER_PREFIX)) {
resultFields.add(outerTuple.getField(attrName.substring(OUTER_PREFIX.length())));
}
}
}
return new Tuple(outputSchema, resultFields);
}
use of edu.uci.ics.texera.api.field.ListField in project textdb by TextDB.
the class WordCloudSink method wordCount.
public List<Map.Entry<String, Integer>> wordCount() {
Tuple tuple;
HashMap<String, Integer> wordCountMap = new HashMap<>();
while ((tuple = inputOperator.getNextTuple()) != null) {
if (addPayload) {
tuple = new Tuple.Builder(tuple).add(SchemaConstants.PAYLOAD_ATTRIBUTE, new ListField<Span>(DataflowUtils.generatePayloadFromTuple(tuple, predicate.getLuceneAnalyzerString()))).build();
}
ListField<Span> payloadField = tuple.getField("payload");
List<Span> payloadSpanList = payloadField.getValue();
for (Span span : payloadSpanList) {
if (span.getAttributeName().equals(predicate.getAttribute())) {
String key = span.getValue().toLowerCase();
if (!StopAnalyzer.ENGLISH_STOP_WORDS_SET.contains(key))
wordCountMap.put(key, wordCountMap.get(key) == null ? 1 : wordCountMap.get(key) + 1);
}
}
}
return wordCountMap.entrySet().stream().sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())).collect(Collectors.toList());
}
Aggregations