Search in sources :

Example 41 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class Groupby method createJob.

private static JobSpecification createJob(FileSplit[] inSplits, FileSplit[] outSplits, int htSize, long fileSize, int frameLimit, int frameSize, String alg, boolean outPlain) {
    JobSpecification spec = new JobSpecification(frameSize);
    IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
    FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider, new DelimitedDataTupleParserFactory(lineitemParserFactories, '|'), lineitemDesc);
    createPartitionConstraint(spec, fileScanner, inSplits);
    // Output: each unique string with an integer count
    RecordDescriptor outDesc = new RecordDescriptor(new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE, // IntegerSerializerDeserializer.INSTANCE,
    IntegerSerializerDeserializer.INSTANCE });
    // Specify the grouping key, which will be the string extracted during
    // the scan.
    int[] keys = new int[] { 0 };
    AbstractOperatorDescriptor grouper;
    if (alg.equalsIgnoreCase("hash")) {
        // external hash graph
        grouper = new ExternalGroupOperatorDescriptor(spec, htSize, fileSize, keys, frameLimit, new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
        PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new IntegerNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(false) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, false) }), outDesc, outDesc, new HashSpillableTableFactory(new IBinaryHashFunctionFamily[] { MurmurHash3BinaryHashFunctionFamily.INSTANCE }));
        createPartitionConstraint(spec, grouper, outSplits);
    } else if (alg.equalsIgnoreCase("sort")) {
        grouper = new SortGroupByOperatorDescriptor(spec, frameLimit, keys, keys, new IntegerNormalizedKeyComputerFactory(), new IBinaryComparatorFactory[] { // PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY),
        PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(keys.length, true) }), outDesc, outDesc, false);
        createPartitionConstraint(spec, grouper, outSplits);
    } else {
        System.err.println("unknow groupby alg:" + alg);
        return null;
    }
    // Connect scanner with the grouper
    IConnectorDescriptor scanGroupConnDef2 = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, new IBinaryHashFunctionFactory[] { // PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY),
    PointableBinaryHashFunctionFactory.of(IntegerPointable.FACTORY) }));
    spec.connect(scanGroupConnDef2, fileScanner, 0, grouper, 0);
    IFileSplitProvider outSplitProvider = new ConstantFileSplitProvider(outSplits);
    AbstractSingleActivityOperatorDescriptor writer = outPlain ? new PlainFileWriterOperatorDescriptor(spec, outSplitProvider, "|") : new FrameFileWriterOperatorDescriptor(spec, outSplitProvider);
    createPartitionConstraint(spec, writer, outSplits);
    IConnectorDescriptor groupOutConn = new OneToOneConnectorDescriptor(spec);
    spec.connect(groupOutConn, grouper, 0, writer, 0);
    spec.addRoot(writer);
    return spec;
}
Also used : IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) RecordDescriptor(org.apache.hyracks.api.dataflow.value.RecordDescriptor) HashSpillableTableFactory(org.apache.hyracks.dataflow.std.group.HashSpillableTableFactory) CountFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) IBinaryHashFunctionFactory(org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory) IntegerNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.IntegerNormalizedKeyComputerFactory) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification) IFieldAggregateDescriptorFactory(org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory) IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) MultiFieldsAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory) AbstractSingleActivityOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor) ExternalGroupOperatorDescriptor(org.apache.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) MToNPartitioningConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AbstractOperatorDescriptor(org.apache.hyracks.dataflow.std.base.AbstractOperatorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) IntSumFieldAggregatorFactory(org.apache.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory) FrameFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FrameFileWriterOperatorDescriptor) SortGroupByOperatorDescriptor(org.apache.hyracks.dataflow.std.group.sort.SortGroupByOperatorDescriptor)

Example 42 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobSpecification method toJSON.

@SuppressWarnings("incomplete-switch")
public ObjectNode toJSON() throws IOException {
    ObjectMapper om = new ObjectMapper();
    ObjectNode jjob = om.createObjectNode();
    ArrayNode jopArray = om.createArrayNode();
    for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
        ObjectNode op = e.getValue().toJSON();
        if (!userConstraints.isEmpty()) {
            // Add operator partition constraints to each JSON operator.
            ObjectNode pcObject = om.createObjectNode();
            ObjectNode pleObject = om.createObjectNode();
            Iterator<Constraint> test = userConstraints.iterator();
            while (test.hasNext()) {
                Constraint constraint = test.next();
                switch(constraint.getLValue().getTag()) {
                    case PARTITION_COUNT:
                        PartitionCountExpression pce = (PartitionCountExpression) constraint.getLValue();
                        if (e.getKey() == pce.getOperatorDescriptorId()) {
                            pcObject.put("count", getConstraintExpressionRValue(constraint));
                        }
                        break;
                    case PARTITION_LOCATION:
                        PartitionLocationExpression ple = (PartitionLocationExpression) constraint.getLValue();
                        if (e.getKey() == ple.getOperatorDescriptorId()) {
                            pleObject.put(Integer.toString(ple.getPartition()), getConstraintExpressionRValue(constraint));
                        }
                        break;
                }
            }
            if (pleObject.size() > 0) {
                pcObject.set("location", pleObject);
            }
            if (pcObject.size() > 0) {
                op.set("partition-constraints", pcObject);
            }
        }
        jopArray.add(op);
    }
    jjob.set("operators", jopArray);
    ArrayNode jcArray = om.createArrayNode();
    for (Map.Entry<ConnectorDescriptorId, IConnectorDescriptor> e : connMap.entrySet()) {
        ObjectNode conn = om.createObjectNode();
        Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>> connection = connectorOpMap.get(e.getKey());
        if (connection != null) {
            conn.put("in-operator-id", connection.getLeft().getLeft().getOperatorId().toString());
            conn.put("in-operator-port", connection.getLeft().getRight().intValue());
            conn.put("out-operator-id", connection.getRight().getLeft().getOperatorId().toString());
            conn.put("out-operator-port", connection.getRight().getRight().intValue());
        }
        conn.set("connector", e.getValue().toJSON());
        jcArray.add(conn);
    }
    jjob.set("connectors", jcArray);
    return jjob;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Constraint(org.apache.hyracks.api.constraints.Constraint) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) PartitionLocationExpression(org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression) HashMap(java.util.HashMap) Map(java.util.Map) PartitionCountExpression(org.apache.hyracks.api.constraints.expressions.PartitionCountExpression) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Pair(org.apache.commons.lang3.tuple.Pair)

Example 43 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobSpecification method toString.

@Override
public String toString() {
    StringBuilder buffer = new StringBuilder();
    for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
        buffer.append(e.getKey().getId()).append(" : ").append(e.getValue().toString()).append("\n");
        List<IConnectorDescriptor> inputs = opInputMap.get(e.getKey());
        if (inputs != null && !inputs.isEmpty()) {
            buffer.append("   Inputs:\n");
            for (IConnectorDescriptor c : inputs) {
                buffer.append("      ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
            }
        }
        List<IConnectorDescriptor> outputs = opOutputMap.get(e.getKey());
        if (outputs != null && !outputs.isEmpty()) {
            buffer.append("   Outputs:\n");
            for (IConnectorDescriptor c : outputs) {
                buffer.append("      ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
            }
        }
    }
    buffer.append("\n").append("Constraints:\n").append(userConstraints);
    return buffer.toString();
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) HashMap(java.util.HashMap) Map(java.util.Map)

Example 44 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobActivityGraphBuilder method addSourceEdge.

@Override
public void addSourceEdge(int operatorInputIndex, IActivity task, int taskInputIndex) {
    if (LOGGER.isLoggable(Level.FINEST)) {
        LOGGER.finest("Adding source edge: " + task.getActivityId() + ":" + operatorInputIndex + " -> " + task.getActivityId() + ":" + taskInputIndex);
    }
    IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
    IConnectorDescriptor conn = jobSpec.getInputConnectorDescriptor(op, operatorInputIndex);
    insertIntoIndexedMap(jag.getActivityInputMap(), task.getActivityId(), taskInputIndex, conn);
    connectorConsumerMap.put(conn.getConnectorId(), Pair.of(task, taskInputIndex));
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor)

Example 45 with IConnectorDescriptor

use of org.apache.hyracks.api.dataflow.IConnectorDescriptor in project asterixdb by apache.

the class JobActivityGraphBuilder method addTargetEdge.

@Override
public void addTargetEdge(int operatorOutputIndex, IActivity task, int taskOutputIndex) {
    if (LOGGER.isLoggable(Level.FINEST)) {
        LOGGER.finest("Adding target edge: " + task.getActivityId() + ":" + operatorOutputIndex + " -> " + task.getActivityId() + ":" + taskOutputIndex);
    }
    IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
    IConnectorDescriptor conn = jobSpec.getOutputConnectorDescriptor(op, operatorOutputIndex);
    insertIntoIndexedMap(jag.getActivityOutputMap(), task.getActivityId(), taskOutputIndex, conn);
    connectorProducerMap.put(conn.getConnectorId(), Pair.of(task, taskOutputIndex));
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor)

Aggregations

IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)72 JobSpecification (org.apache.hyracks.api.job.JobSpecification)45 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)40 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)40 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)39 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 Test (org.junit.Test)35 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)34 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)33 IBinaryHashFunctionFactory (org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory)32 MToNPartitioningConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor)31 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)27 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)25 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)24 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)22 FileSplit (org.apache.hyracks.api.io.FileSplit)21 MultiFieldsAggregatorFactory (org.apache.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory)20 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)19 IFieldAggregateDescriptorFactory (org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory)19 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)18