use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class SortMergeExchangePOperator method createConnectorDescriptor.
@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
int n = sortColumns.length;
int[] sortFields = new int[n];
IBinaryComparatorFactory[] comps = new IBinaryComparatorFactory[n];
IBinaryHashFunctionFactory[] hashFuns = new IBinaryHashFunctionFactory[n];
IVariableTypeEnvironment env = context.getTypeEnvironment(op);
INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
INormalizedKeyComputerFactory nkcf = null;
for (int i = 0; i < n; i++) {
sortFields[i] = opSchema.findVariable(sortColumns[i].getColumn());
Object type = env.getVarType(sortColumns[i].getColumn());
IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
comps[i] = bcfp.getBinaryComparatorFactory(type, sortColumns[i].getOrder() == OrderKind.ASC);
IBinaryHashFunctionFactoryProvider bhffp = context.getBinaryHashFunctionFactoryProvider();
hashFuns[i] = bhffp.getBinaryHashFunctionFactory(type);
if (i == 0 && nkcfProvider != null && type != null) {
nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, sortColumns[i].getOrder() == OrderKind.ASC);
}
}
ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(sortFields, hashFuns);
IConnectorDescriptor conn = new MToNPartitioningMergingConnectorDescriptor(spec, tpcf, sortFields, comps, nkcf);
return new Pair<IConnectorDescriptor, TargetConstraint>(conn, TargetConstraint.ONE);
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class StartTasksWork method run.
@Override
public void run() {
Task task = null;
try {
NCServiceContext serviceCtx = ncs.getContext();
Joblet joblet = getOrCreateLocalJoblet(deploymentId, jobId, serviceCtx, acgBytes);
final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {
@Override
public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
ActivityCluster ac = acg.getActivityMap().get(aid);
IConnectorDescriptor conn = ac.getActivityOutputMap().get(aid).get(outputIndex);
return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
}
@Override
public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
ActivityCluster ac = acg.getActivityMap().get(aid);
IConnectorDescriptor conn = ac.getActivityInputMap().get(aid).get(inputIndex);
return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
}
};
for (TaskAttemptDescriptor td : taskDescriptors) {
TaskAttemptId taId = td.getTaskAttemptId();
TaskId tid = taId.getTaskId();
ActivityId aid = tid.getActivityId();
ActivityCluster ac = acg.getActivityMap().get(aid);
IActivity han = ac.getActivityMap().get(aid);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Initializing " + taId + " -> " + han);
}
final int partition = tid.getPartition();
List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid);
task = new Task(joblet, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs));
IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount());
List<IPartitionCollector> collectors = new ArrayList<>();
if (inputs != null) {
for (int i = 0; i < inputs.size(); ++i) {
IConnectorDescriptor conn = inputs.get(i);
IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("input: " + i + ": " + conn.getConnectorId());
}
RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
IPartitionCollector collector = createPartitionCollector(td, partition, task, i, conn, recordDesc, cPolicy);
collectors.add(collector);
}
}
List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(aid);
if (outputs != null) {
for (int i = 0; i < outputs.size(); ++i) {
final IConnectorDescriptor conn = outputs.get(i);
RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
IPartitionWriterFactory pwFactory = createPartitionWriterFactory(task, cPolicy, jobId, conn, partition, taId, flags);
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("output: " + i + ": " + conn.getConnectorId());
}
IFrameWriter writer = conn.createPartitioner(task, recordDesc, pwFactory, partition, td.getPartitionCount(), td.getOutputPartitionCounts()[i]);
operator.setOutputFrameWriter(i, writer, recordDesc);
}
}
task.setTaskRuntime(collectors.toArray(new IPartitionCollector[collectors.size()]), operator);
joblet.addTask(task);
task.start();
}
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Failure starting a task", e);
// notify cc of start task failure
List<Exception> exceptions = new ArrayList<>();
ExceptionUtils.setNodeIds(exceptions, ncs.getId());
ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions));
}
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobBuilder method setPartitionConstraintsBottomup.
private void setPartitionConstraintsBottomup(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp, boolean finalPass) {
List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
AlgebricksPartitionConstraint opConstraint = null;
IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
if (opInputs != null) {
for (IConnectorDescriptor conn : opInputs) {
ConnectorDescriptorId cid = conn.getConnectorId();
org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
IOperatorDescriptor src = p.getLeft().getLeft();
// Pre-order DFS
setPartitionConstraintsBottomup(src.getOperatorId(), tgtConstraints, opDesc, finalPass);
TargetConstraint constraint = tgtConstraints.get(conn);
if (constraint != null) {
switch(constraint) {
case ONE:
opConstraint = countOneLocation;
break;
case SAME_COUNT:
opConstraint = partitionConstraintMap.get(src);
break;
}
}
}
}
if (partitionConstraintMap.get(opDesc) == null) {
if (finalPass && opConstraint == null && (opInputs == null || opInputs.isEmpty())) {
opConstraint = countOneLocation;
}
if (finalPass && opConstraint == null) {
opConstraint = clusterLocations;
}
// Sets up the location constraint.
if (opConstraint != null) {
partitionConstraintMap.put(opDesc, opConstraint);
AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, opDesc, opConstraint);
}
}
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class JobBuilder method setupConnectors.
private Map<IConnectorDescriptor, TargetConstraint> setupConnectors() throws AlgebricksException {
Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = new HashMap<>();
for (ILogicalOperator exchg : connectors.keySet()) {
ILogicalOperator inOp = inEdges.get(exchg).get(0);
ILogicalOperator outOp = outEdges.get(exchg).get(0);
IOperatorDescriptor inOpDesc = findOpDescForAlgebraicOp(inOp);
IOperatorDescriptor outOpDesc = findOpDescForAlgebraicOp(outOp);
Pair<IConnectorDescriptor, TargetConstraint> connPair = connectors.get(exchg);
IConnectorDescriptor conn = connPair.first;
int producerPort = outEdges.get(inOp).indexOf(exchg);
int consumerPort = inEdges.get(outOp).indexOf(exchg);
jobSpec.connect(conn, inOpDesc, producerPort, outOpDesc, consumerPort);
if (connPair.second != null) {
tgtConstraints.put(conn, connPair.second);
}
}
return tgtConstraints;
}
use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.
the class WordCountMain method createJob.
private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, String algo, int htSize, int frameLimit, String format, int frameSize) {
JobSpecification spec = new JobSpecification(frameSize);
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
RecordDescriptor wordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new WordTupleParserFactory(), wordDesc);
createPartitionConstraint(spec, wordScanner, inSplits);
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
IOperatorDescriptor gBy;
int[] keys = new int[] { 0 };
if ("hash".equalsIgnoreCase(algo)) {
gBy = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
createPartitionConstraint(spec, gBy, outSplits);
IConnectorDescriptor scanGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(scanGroupConn, wordScanner, 0, gBy, 0);
} else {
IBinaryComparatorFactory[] cfs = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
IOperatorDescriptor sorter = "memsort".equalsIgnoreCase(algo) ? new InMemorySortOperatorDescriptor(spec, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc) : new ExternalSortOperatorDescriptor(spec, frameLimit, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc);
createPartitionConstraint(spec, sorter, outSplits);
IConnectorDescriptor scanSortConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(scanSortConn, wordScanner, 0, sorter, 0);
gBy = new PreclusteredGroupOperatorDescriptor(spec, keys, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), groupResultDesc);
createPartitionConstraint(spec, gBy, outSplits);
OneToOneConnectorDescriptor sortGroupConn = new OneToOneConnectorDescriptor(spec);
spec.connect(sortGroupConn, sorter, 0, gBy, 0);
}
IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
IOperatorDescriptor writer = "text".equalsIgnoreCase(format) ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, ",") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
createPartitionConstraint(spec, writer, outSplits);
IConnectorDescriptor gbyPrinterConn = new OneToOneConnectorDescriptor(spec);
spec.connect(gbyPrinterConn, gBy, 0, writer, 0);
spec.addRoot(writer);
return spec;
}
Aggregations