Search in sources :

Example 61 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class Sort method createJob.

private static JobSpecification createJob(FileSplit[] ordersSplits, FileSplit[] outputSplit, String memBufferAlg, int frameLimit, int frameSize, int limit, boolean usingHeapSorter) {
    JobSpecification spec = new JobSpecification();
    spec.setFrameSize(frameSize);
    IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
    FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, new DelimitedDataTupleParserFactory(orderParserFactories, '|'), ordersDesc);
    createPartitionConstraint(spec, ordScanner, ordersSplits);
    AbstractSorterOperatorDescriptor sorter;
    if (usingHeapSorter && limit < Integer.MAX_VALUE) {
        sorter = new TopKSorterOperatorDescriptor(spec, frameLimit, limit, SortFields, null, SortFieldsComparatorFactories, ordersDesc);
    } else {
        if (memBufferAlg.equalsIgnoreCase("bestfit")) {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.SMALLEST_FIT, limit);
        } else if (memBufferAlg.equalsIgnoreCase("biggestfit")) {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.BIGGEST_FIT, limit);
        } else {
            sorter = new ExternalSortOperatorDescriptor(spec, frameLimit, SortFields, null, SortFieldsComparatorFactories, ordersDesc, Algorithm.MERGE_SORT, EnumFreeSlotPolicy.LAST_FIT, limit);
        }
    }
    createPartitionConstraint(spec, sorter, ordersSplits);
    IFileSplitProvider outputSplitProvider = new ConstantFileSplitProvider(outputSplit);
    IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outputSplitProvider, "|");
    createPartitionConstraint(spec, printer, outputSplit);
    spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
    spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(SortFields, orderBinaryHashFunctionFactories), SortFields, SortFieldsComparatorFactories, new UTF8StringNormalizedKeyComputerFactory()), sorter, 0, printer, 0);
    spec.addRoot(printer);
    return spec;
}
Also used : UTF8StringNormalizedKeyComputerFactory(org.apache.hyracks.dataflow.common.data.normalizers.UTF8StringNormalizedKeyComputerFactory) IFileSplitProvider(org.apache.hyracks.dataflow.std.file.IFileSplitProvider) MToNPartitioningMergingConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor) ConstantFileSplitProvider(org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider) DelimitedDataTupleParserFactory(org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory) AbstractSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.AbstractSorterOperatorDescriptor) OneToOneConnectorDescriptor(org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor) FieldHashPartitionComputerFactory(org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory) TopKSorterOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.TopKSorterOperatorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) PlainFileWriterOperatorDescriptor(org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor) FileScanOperatorDescriptor(org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor) ExternalSortOperatorDescriptor(org.apache.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor) JobSpecification(org.apache.hyracks.api.job.JobSpecification)

Example 62 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobSpecification method toJSON.

@SuppressWarnings("incomplete-switch")
public ObjectNode toJSON() throws IOException {
    ObjectMapper om = new ObjectMapper();
    ObjectNode jjob = om.createObjectNode();
    ArrayNode jopArray = om.createArrayNode();
    for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
        ObjectNode op = e.getValue().toJSON();
        if (!userConstraints.isEmpty()) {
            // Add operator partition constraints to each JSON operator.
            ObjectNode pcObject = om.createObjectNode();
            ObjectNode pleObject = om.createObjectNode();
            Iterator<Constraint> test = userConstraints.iterator();
            while (test.hasNext()) {
                Constraint constraint = test.next();
                switch(constraint.getLValue().getTag()) {
                    case PARTITION_COUNT:
                        PartitionCountExpression pce = (PartitionCountExpression) constraint.getLValue();
                        if (e.getKey() == pce.getOperatorDescriptorId()) {
                            pcObject.put("count", getConstraintExpressionRValue(constraint));
                        }
                        break;
                    case PARTITION_LOCATION:
                        PartitionLocationExpression ple = (PartitionLocationExpression) constraint.getLValue();
                        if (e.getKey() == ple.getOperatorDescriptorId()) {
                            pleObject.put(Integer.toString(ple.getPartition()), getConstraintExpressionRValue(constraint));
                        }
                        break;
                }
            }
            if (pleObject.size() > 0) {
                pcObject.set("location", pleObject);
            }
            if (pcObject.size() > 0) {
                op.set("partition-constraints", pcObject);
            }
        }
        jopArray.add(op);
    }
    jjob.set("operators", jopArray);
    ArrayNode jcArray = om.createArrayNode();
    for (Map.Entry<ConnectorDescriptorId, IConnectorDescriptor> e : connMap.entrySet()) {
        ObjectNode conn = om.createObjectNode();
        Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>> connection = connectorOpMap.get(e.getKey());
        if (connection != null) {
            conn.put("in-operator-id", connection.getLeft().getLeft().getOperatorId().toString());
            conn.put("in-operator-port", connection.getLeft().getRight().intValue());
            conn.put("out-operator-id", connection.getRight().getLeft().getOperatorId().toString());
            conn.put("out-operator-port", connection.getRight().getRight().intValue());
        }
        conn.set("connector", e.getValue().toJSON());
        jcArray.add(conn);
    }
    jjob.set("connectors", jcArray);
    return jjob;
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Constraint(org.apache.hyracks.api.constraints.Constraint) ConnectorDescriptorId(org.apache.hyracks.api.dataflow.ConnectorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) PartitionLocationExpression(org.apache.hyracks.api.constraints.expressions.PartitionLocationExpression) HashMap(java.util.HashMap) Map(java.util.Map) PartitionCountExpression(org.apache.hyracks.api.constraints.expressions.PartitionCountExpression) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Pair(org.apache.commons.lang3.tuple.Pair)

Example 63 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobSpecification method toString.

@Override
public String toString() {
    StringBuilder buffer = new StringBuilder();
    for (Map.Entry<OperatorDescriptorId, IOperatorDescriptor> e : opMap.entrySet()) {
        buffer.append(e.getKey().getId()).append(" : ").append(e.getValue().toString()).append("\n");
        List<IConnectorDescriptor> inputs = opInputMap.get(e.getKey());
        if (inputs != null && !inputs.isEmpty()) {
            buffer.append("   Inputs:\n");
            for (IConnectorDescriptor c : inputs) {
                buffer.append("      ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
            }
        }
        List<IConnectorDescriptor> outputs = opOutputMap.get(e.getKey());
        if (outputs != null && !outputs.isEmpty()) {
            buffer.append("   Outputs:\n");
            for (IConnectorDescriptor c : outputs) {
                buffer.append("      ").append(c.getConnectorId().getId()).append(" : ").append(c.toString()).append("\n");
            }
        }
    }
    buffer.append("\n").append("Constraints:\n").append(userConstraints);
    return buffer.toString();
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) OperatorDescriptorId(org.apache.hyracks.api.dataflow.OperatorDescriptorId) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor) HashMap(java.util.HashMap) Map(java.util.Map)

Example 64 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobActivityGraphBuilder method addSourceEdge.

@Override
public void addSourceEdge(int operatorInputIndex, IActivity task, int taskInputIndex) {
    if (LOGGER.isLoggable(Level.FINEST)) {
        LOGGER.finest("Adding source edge: " + task.getActivityId() + ":" + operatorInputIndex + " -> " + task.getActivityId() + ":" + taskInputIndex);
    }
    IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
    IConnectorDescriptor conn = jobSpec.getInputConnectorDescriptor(op, operatorInputIndex);
    insertIntoIndexedMap(jag.getActivityInputMap(), task.getActivityId(), taskInputIndex, conn);
    connectorConsumerMap.put(conn.getConnectorId(), Pair.of(task, taskInputIndex));
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor)

Example 65 with IOperatorDescriptor

use of org.apache.hyracks.api.dataflow.IOperatorDescriptor in project asterixdb by apache.

the class JobActivityGraphBuilder method addTargetEdge.

@Override
public void addTargetEdge(int operatorOutputIndex, IActivity task, int taskOutputIndex) {
    if (LOGGER.isLoggable(Level.FINEST)) {
        LOGGER.finest("Adding target edge: " + task.getActivityId() + ":" + operatorOutputIndex + " -> " + task.getActivityId() + ":" + taskOutputIndex);
    }
    IOperatorDescriptor op = activityOperatorMap.get(task.getActivityId());
    IConnectorDescriptor conn = jobSpec.getOutputConnectorDescriptor(op, operatorOutputIndex);
    insertIntoIndexedMap(jag.getActivityOutputMap(), task.getActivityId(), taskOutputIndex, conn);
    connectorProducerMap.put(conn.getConnectorId(), Pair.of(task, taskOutputIndex));
}
Also used : IConnectorDescriptor(org.apache.hyracks.api.dataflow.IConnectorDescriptor) IOperatorDescriptor(org.apache.hyracks.api.dataflow.IOperatorDescriptor)

Aggregations

IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)89 JobSpecification (org.apache.hyracks.api.job.JobSpecification)61 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)52 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)51 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)48 ConstantFileSplitProvider (org.apache.hyracks.dataflow.std.file.ConstantFileSplitProvider)48 Test (org.junit.Test)41 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)37 IConnectorDescriptor (org.apache.hyracks.api.dataflow.IConnectorDescriptor)34 FileScanOperatorDescriptor (org.apache.hyracks.dataflow.std.file.FileScanOperatorDescriptor)34 DelimitedDataTupleParserFactory (org.apache.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory)33 ManagedFileSplit (org.apache.hyracks.api.io.ManagedFileSplit)30 FileSplit (org.apache.hyracks.api.io.FileSplit)28 AlgebricksPartitionConstraint (org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint)26 IValueParserFactory (org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory)26 ResultSetId (org.apache.hyracks.api.dataset.ResultSetId)24 ResultWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor)23 ILogicalOperator (org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator)19 PlainFileWriterOperatorDescriptor (org.apache.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor)19 FieldHashPartitionComputerFactory (org.apache.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory)18