Search in sources :

Example 36 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class SortMergeExchangePOperator method createConnectorDescriptor.

@Override
public Pair<IConnectorDescriptor, TargetConstraint> createConnectorDescriptor(IConnectorDescriptorRegistry spec, ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
    int n = sortColumns.length;
    int[] sortFields = new int[n];
    IBinaryComparatorFactory[] comps = new IBinaryComparatorFactory[n];
    IBinaryHashFunctionFactory[] hashFuns = new IBinaryHashFunctionFactory[n];
    IVariableTypeEnvironment env = context.getTypeEnvironment(op);
    INormalizedKeyComputerFactoryProvider nkcfProvider = context.getNormalizedKeyComputerFactoryProvider();
    INormalizedKeyComputerFactory nkcf = null;
    for (int i = 0; i < n; i++) {
        sortFields[i] = opSchema.findVariable(sortColumns[i].getColumn());
        Object type = env.getVarType(sortColumns[i].getColumn());
        IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
        comps[i] = bcfp.getBinaryComparatorFactory(type, sortColumns[i].getOrder() == OrderKind.ASC);
        IBinaryHashFunctionFactoryProvider bhffp = context.getBinaryHashFunctionFactoryProvider();
        hashFuns[i] = bhffp.getBinaryHashFunctionFactory(type);
        if (i == 0 && nkcfProvider != null && type != null) {
            nkcf = nkcfProvider.getNormalizedKeyComputerFactory(type, sortColumns[i].getOrder() == OrderKind.ASC);
        }
    }
    ITuplePartitionComputerFactory tpcf = new FieldHashPartitionComputerFactory(sortFields, hashFuns);
    IConnectorDescriptor conn = new MToNPartitioningMergingConnectorDescriptor(spec, tpcf, sortFields, comps, nkcf);
    return new Pair<IConnectorDescriptor, TargetConstraint>(conn, TargetConstraint.ONE);
}
Also used : ITuplePartitionComputerFactory(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IBinaryHashFunctionFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) IBinaryComparatorFactoryProvider(org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TargetConstraint(org.apache.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder.TargetConstraint) INormalizedKeyComputerFactory(org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory) INormalizedKeyComputerFactoryProvider(org.apache.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider) IVariableTypeEnvironment(org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 37 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class StartTasksWork method run.

@Override
public void run() {
    Task task = null;
    try {
        NCServiceContext serviceCtx = ncs.getContext();
        Joblet joblet = getOrCreateLocalJoblet(deploymentId, jobId, serviceCtx, acgBytes);
        final ActivityClusterGraph acg = joblet.getActivityClusterGraph();
        IRecordDescriptorProvider rdp = new IRecordDescriptorProvider() {

            @Override
            public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityOutputMap().get(aid).get(outputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }

            @Override
            public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
                ActivityCluster ac = acg.getActivityMap().get(aid);
                IConnectorDescriptor conn = ac.getActivityInputMap().get(aid).get(inputIndex);
                return ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
            }
        };
        for (TaskAttemptDescriptor td : taskDescriptors) {
            TaskAttemptId taId = td.getTaskAttemptId();
            TaskId tid = taId.getTaskId();
            ActivityId aid = tid.getActivityId();
            ActivityCluster ac = acg.getActivityMap().get(aid);
            IActivity han = ac.getActivityMap().get(aid);
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info("Initializing " + taId + " -> " + han);
            }
            final int partition = tid.getPartition();
            List<IConnectorDescriptor> inputs = ac.getActivityInputMap().get(aid);
            task = new Task(joblet, taId, han.getClass().getName(), ncs.getExecutor(), ncs, createInputChannels(td, inputs));
            IOperatorNodePushable operator = han.createPushRuntime(task, rdp, partition, td.getPartitionCount());
            List<IPartitionCollector> collectors = new ArrayList<>();
            if (inputs != null) {
                for (int i = 0; i < inputs.size(); ++i) {
                    IConnectorDescriptor conn = inputs.get(i);
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("input: " + i + ": " + conn.getConnectorId());
                    }
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IPartitionCollector collector = createPartitionCollector(td, partition, task, i, conn, recordDesc, cPolicy);
                    collectors.add(collector);
                }
            }
            List<IConnectorDescriptor> outputs = ac.getActivityOutputMap().get(aid);
            if (outputs != null) {
                for (int i = 0; i < outputs.size(); ++i) {
                    final IConnectorDescriptor conn = outputs.get(i);
                    RecordDescriptor recordDesc = ac.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
                    IConnectorPolicy cPolicy = connectorPoliciesMap.get(conn.getConnectorId());
                    IPartitionWriterFactory pwFactory = createPartitionWriterFactory(task, cPolicy, jobId, conn, partition, taId, flags);
                    if (LOGGER.isLoggable(Level.INFO)) {
                        LOGGER.info("output: " + i + ": " + conn.getConnectorId());
                    }
                    IFrameWriter writer = conn.createPartitioner(task, recordDesc, pwFactory, partition, td.getPartitionCount(), td.getOutputPartitionCounts()[i]);
                    operator.setOutputFrameWriter(i, writer, recordDesc);
                }
            }
            task.setTaskRuntime(collectors.toArray(new IPartitionCollector[collectors.size()]), operator);
            joblet.addTask(task);
            task.start();
        }
    } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Failure starting a task", e);
        // notify cc of start task failure
        List<Exception> exceptions = new ArrayList<>();
        ExceptionUtils.setNodeIds(exceptions, ncs.getId());
        ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, task, exceptions));
    }
}
Also used : IFrameWriter(org.apache.hyracks.api.comm.IFrameWriter) Task(org.apache.hyracks.control.nc.Task) TaskId(org.apache.hyracks.api.dataflow.TaskId) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) ActivityId(org.apache.hyracks.api.dataflow.ActivityId) ArrayList(java.util.ArrayList) IRecordDescriptorProvider(org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider) Joblet(org.apache.hyracks.control.nc.Joblet) IActivity(org.apache.hyracks.api.dataflow.IActivity) NCServiceContext(org.apache.hyracks.control.nc.application.NCServiceContext) INCServiceContext(org.apache.hyracks.api.application.INCServiceContext) List(java.util.List) ArrayList(java.util.ArrayList) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IPartitionCollector(org.apache.hyracks.api.comm.IPartitionCollector) TaskAttemptId(org.apache.hyracks.api.dataflow.TaskAttemptId) IConnectorPolicy(org.apache.hyracks.api.dataflow.connectors.IConnectorPolicy) IPartitionWriterFactory(org.apache.hyracks.api.comm.IPartitionWriterFactory) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) UnknownHostException(java.net.UnknownHostException) HyracksException(org.apache.hyracks.api.exceptions.HyracksException) ActivityCluster(org.apache.hyracks.api.job.ActivityCluster) TaskAttemptDescriptor(org.apache.hyracks.control.common.job.TaskAttemptDescriptor) ActivityClusterGraph(org.apache.hyracks.api.job.ActivityClusterGraph) IOperatorNodePushable(org.apache.hyracks.api.dataflow.IOperatorNodePushable)

Example 38 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobBuilder method setPartitionConstraintsBottomup.

private void setPartitionConstraintsBottomup(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp, boolean finalPass) {
    List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId);
    AlgebricksPartitionConstraint opConstraint = null;
    IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId);
    if (opInputs != null) {
        for (IConnectorDescriptor conn : opInputs) {
            ConnectorDescriptorId cid = conn.getConnectorId();
            org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid);
            IOperatorDescriptor src = p.getLeft().getLeft();
            // Pre-order DFS
            setPartitionConstraintsBottomup(src.getOperatorId(), tgtConstraints, opDesc, finalPass);
            TargetConstraint constraint = tgtConstraints.get(conn);
            if (constraint != null) {
                switch(constraint) {
                    case ONE:
                        opConstraint = countOneLocation;
                        break;
                    case SAME_COUNT:
                        opConstraint = partitionConstraintMap.get(src);
                        break;
                }
            }
        }
    }
    if (partitionConstraintMap.get(opDesc) == null) {
        if (finalPass && opConstraint == null && (opInputs == null || opInputs.isEmpty())) {
            opConstraint = countOneLocation;
        }
        if (finalPass && opConstraint == null) {
            opConstraint = clusterLocations;
        }
        // Sets up the location constraint.
        if (opConstraint != null) {
            partitionConstraintMap.put(opDesc, opConstraint);
            AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, opDesc, opConstraint);
        }
    }
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) Pair(org.apache.hyracks.algebricks.common.utils.Pair)

Example 39 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobBuilder method setupConnectors.

private Map<IConnectorDescriptor, TargetConstraint> setupConnectors() throws AlgebricksException {
    Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = new HashMap<>();
    for (ILogicalOperator exchg : connectors.keySet()) {
        ILogicalOperator inOp = inEdges.get(exchg).get(0);
        ILogicalOperator outOp = outEdges.get(exchg).get(0);
        IOperatorDescriptor inOpDesc = findOpDescForAlgebraicOp(inOp);
        IOperatorDescriptor outOpDesc = findOpDescForAlgebraicOp(outOp);
        Pair<IConnectorDescriptor, TargetConstraint> connPair = connectors.get(exchg);
        IConnectorDescriptor conn = connPair.first;
        int producerPort = outEdges.get(inOp).indexOf(exchg);
        int consumerPort = inEdges.get(outOp).indexOf(exchg);
        jobSpec.connect(conn, inOpDesc, producerPort, outOpDesc, consumerPort);
        if (connPair.second != null) {
            tgtConstraints.put(conn, connPair.second);
        }
    }
    return tgtConstraints;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) HashMap(java.util.HashMap) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ILogicalOperator(org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator) AlgebricksPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint) AlgebricksAbsolutePartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint) AlgebricksCountPartitionConstraint(org.apache.hyracks.algebricks.common.constraints.AlgebricksCountPartitionConstraint)

Example 40 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class WordCountMain method createJob.

private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, String algo, int htSize, int frameLimit, String format, int frameSize) {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
    RecordDescriptor wordDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
    FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new WordTupleParserFactory(), wordDesc);
    createPartitionConstraint(spec, wordScanner, inSplits);
    RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
    IOperatorDescriptor gBy;
    int[] keys = new int[] { 0 };
    if ("hash".equalsIgnoreCase(algo)) {
        gBy = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(3, false), new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false), new IntSumFieldAggregatorFactory(2, false), new FloatSumFieldAggregatorFactory(3, false) }), groupResultDesc, groupResultDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { UTF8StringBinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, gBy, outSplits);
        IConnectorDescriptor scanGroupConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(scanGroupConn, wordScanner, 0, gBy, 0);
    } else {
        IBinaryComparatorFactory[] cfs = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) };
        IOperatorDescriptor sorter = "memsort".equalsIgnoreCase(algo) ? new InMemorySortOperatorDescriptor(spec, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc) : new ExternalSortOperatorDescriptor(spec, frameLimit, keys, new UTF8StringNormalizedKeyComputerFactory(), cfs, wordDesc);
        createPartitionConstraint(spec, sorter, outSplits);
        IConnectorDescriptor scanSortConn = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
        spec.connect(scanSortConn, wordScanner, 0, sorter, 0);
        gBy = new PreclusteredGroupOperatorDescriptor(spec, keys, new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), groupResultDesc);
        createPartitionConstraint(spec, gBy, outSplits);
        OneToOneConnectorDescriptor sortGroupConn = new OneToOneConnectorDescriptor(spec);
        spec.connect(sortGroupConn, sorter, 0, gBy, 0);
    }
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
    IOperatorDescriptor writer = "text".equalsIgnoreCase(format) ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, ",") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    createPartitionConstraint(spec, writer, outSplits);
    IConnectorDescriptor gbyPrinterConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(gbyPrinterConn, gBy, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : WordTupleParserFactory(org.apache.hyracks.examples.text.WordTupleParserFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) InMemorySortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FloatSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) PreclusteredGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) FrameFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FrameFileWriterOperatorDescriptor)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18