use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class Groupby method createJob.
private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, int htSize, long fileSize, int frameLimit, int frameSize, String alg, boolean outPlain) {
JobSpecification spec = new JobSpecification(frameSize);
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new DelimitedDataTupleParserFactory(lineitemParserFactories, '|'), lineitemDesc);
createPartitionConstraint(spec, fileScanner, inSplits);
// Output: each unique string with an integer count
RecordDescriptor outDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, // IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE });
// Specify the grouping key, which will be the string extracted during
// the scan.
int[] keys = new int[] { 0 };
AbstractOperatorDescriptor grouper;
if (alg.equalsIgnoreCase("hash")) {
// external hash graph
grouper = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new IntegerNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, false) }), outDesc, outDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { MurmurHash3BinaryHashFunctionFamily.INSTANCE }));
createPartitionConstraint(spec, grouper, outSplits);
} else if (alg.equalsIgnoreCase("sort")) {
grouper = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keys, new IntegerNormalizedKeyComputerFactory(), new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, true) }), outDesc, outDesc, false);
createPartitionConstraint(spec, grouper, outSplits);
} else {
System.err.println("unknow groupby alg:" + alg);
return null;
}
// Connect scanner with the grouper
IConnectorDescriptor scanGroupConnDef2 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { // PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY),
PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
spec.connect(scanGroupConnDef2, fileScanner, 0, grouper, 0);
IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
AbstractSingleActivityOperatorDescriptor writer = outPlain ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
createPartitionConstraint(spec, writer, outSplits);
IConnectorDescriptor groupOutConn = new OneToOneConnectorDescriptor(spec);
spec.connect(groupOutConn, grouper, 0, writer, 0);
spec.addRoot(writer);
return spec;
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobSpecification method toJSON.
@SuppressWarnings("incomplete-switch")
public ObjectNode toJSON() throws IOException {
ObjectMapper om = new ObjectMapper();
ObjectNode jjob = om.createObjectNode();
ArrayNode jopArray = om.createArrayNode();
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
ObjectNode op = e.getValue().toJSON();
if (!userConstraints.isEmpty()) {
// Add operator partition constraints to each JSON operator.
ObjectNode pcObject = om.createObjectNode();
ObjectNode pleObject = om.createObjectNode();
Iterator<Constraint> test = userConstraints.iterator();
while (test.hasNext()) {
Constraint constraint = test.next();
switch(constraint.getLValue().getTag()) {
case PARTITION_COUNT:
PartitionCountExpression pce = (PartitionCountExpression) constraint.getLValue();
if (e.getKey() == pce.getOperatorDescriptorId()) {
pcObject.put("count", getConstraintExpressionRValue(constraint));
}
break;
case PARTITION_LOCATION:
PartitionLocationExpression ple = (PartitionLocationExpression) constraint.getLValue();
if (e.getKey() == ple.getOperatorDescriptorId()) {
pleObject.put(Integer.toString(ple.getPartition()), getConstraintExpressionRValue(constraint));
}
break;
}
}
if (pleObject.size() > 0) {
pcObject.set("location", pleObject);
}
if (pcObject.size() > 0) {
op.set("partition-constraints", pcObject);
}
}
jopArray.add(op);
}
jjob.set("operators", jopArray);
ArrayNode jcArray = om.createArrayNode();
for (Map.Entry<ConnectorDescriptorId, IConnectorDescriptor> e : connMap.entrySet()) {
ObjectNode conn = om.createObjectNode();
Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>> connection = connectorOpMap.get(e.getKey());
if (connection != null) {
conn.put("in-operator-id", connection.getLeft().getLeft().getOperatorId().toString());
conn.put("in-operator-port", connection.getLeft().getRight().intValue());
conn.put("out-operator-id", connection.getRight().getLeft().getOperatorId().toString());
conn.put("out-operator-port", connection.getRight().getRight().intValue());
}
conn.set("connector", e.getValue().toJSON());
jcArray.add(conn);
}
jjob.set("connectors", jcArray);
return jjob;
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobSpecification method toString.
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
buffer.append(e.getKey().getId()).append(" : ").append(e.getValue().toString()).append("\n");
List<IConnectorDescriptor> inputs = opInputMap.get(e.getKey());
if (inputs != null && !inputs.isEmpty()) {
buffer.append(" Inputs:\n");
for (IConnectorDescriptor c : inputs) {
buffer.append(" ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
}
}
List<IConnectorDescriptor> outputs = opOutputMap.get(e.getKey());
if (outputs != null && !outputs.isEmpty()) {
buffer.append(" Outputs:\n");
for (IConnectorDescriptor c : outputs) {
buffer.append(" ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
}
}
}
buffer.append("\n").append("Constraints:\n").append(userConstraints);
return buffer.toString();
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobActivityGraphBuilder method addSourceEdge.
@Override
public void addSourceEdge(int operatorInputIndex, IActivity task, int taskInputIndex) {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.finest("Adding source edge: " + task.getActivityId() + ":" + operatorInputIndex + " -> " + task.getActivityId() + ":" + taskInputIndex);
}
IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
IConnectorDescriptor conn = jobSpec.getInputConnectorDescriptor(op, operatorInputIndex);
insertIntoIndexedMap(jag.getActivityInputMap(), task.getActivityId(), taskInputIndex, conn);
connectorConsumerMap.put(conn.getConnectorId(), Pair.of(task, taskInputIndex));
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobActivityGraphBuilder method addTargetEdge.
@Override
public void addTargetEdge(int operatorOutputIndex, IActivity task, int taskOutputIndex) {
if (LOGGER.isLoggable(Level.FINEST)) {
LOGGER.finest("Adding target edge: " + task.getActivityId() + ":" + operatorOutputIndex + " -> " + task.getActivityId() + ":" + taskOutputIndex);
}
IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
IConnectorDescriptor conn = jobSpec.getOutputConnectorDescriptor(op, operatorOutputIndex);
insertIntoIndexedMap(jag.getActivityOutputMap(), task.getActivityId(), taskOutputIndex, conn);
connectorProducerMap.put(conn.getConnectorId(), Pair.of(task, taskOutputIndex));
}
Aggregations