Search in sources :

Example 1 with Partitioner

use of com.datatorrent.api.Partitioner in project apex-core by apache.

the class PhysicalPlan method getPartitioner.

private Partitioner<Operator> getPartitioner(PMapping currentMapping) {
    Operator operator = currentMapping.logicalOperator.getOperator();
    Partitioner<Operator> partitioner = null;
    if (currentMapping.logicalOperator.getAttributes().contains(OperatorContext.PARTITIONER)) {
        @SuppressWarnings("unchecked") Partitioner<Operator> tmp = (Partitioner<Operator>) currentMapping.logicalOperator.getValue(OperatorContext.PARTITIONER);
        partitioner = tmp;
    } else if (operator instanceof Partitioner) {
        @SuppressWarnings("unchecked") Partitioner<Operator> tmp = (Partitioner<Operator>) operator;
        partitioner = tmp;
    }
    return partitioner;
}
Also used : Operator(com.datatorrent.api.Operator) Partitioner(com.datatorrent.api.Partitioner)

Example 2 with Partitioner

use of com.datatorrent.api.Partitioner in project apex-core by apache.

the class PhysicalPlan method redoPartitions.

private void redoPartitions(PMapping currentMapping, String note) {
    Partitioner<Operator> partitioner = getPartitioner(currentMapping);
    if (partitioner == null) {
        LOG.warn("No partitioner for {}", currentMapping.logicalOperator);
        return;
    }
    RepartitionContext mainPC = new RepartitionContext(partitioner, currentMapping, 0);
    if (mainPC.newPartitions.isEmpty()) {
        LOG.warn("Empty partition list after repartition: {}", currentMapping.logicalOperator);
        return;
    }
    int memoryPerPartition = currentMapping.logicalOperator.getValue(OperatorContext.MEMORY_MB);
    for (Map.Entry<OutputPortMeta, StreamMeta> stream : currentMapping.logicalOperator.getOutputStreams().entrySet()) {
        if (stream.getValue().getLocality() != Locality.THREAD_LOCAL && stream.getValue().getLocality() != Locality.CONTAINER_LOCAL) {
            memoryPerPartition += stream.getKey().getValue(PortContext.BUFFER_MEMORY_MB);
        }
    }
    for (OperatorMeta pp : currentMapping.parallelPartitions) {
        for (Map.Entry<OutputPortMeta, StreamMeta> stream : pp.getOutputStreams().entrySet()) {
            if (stream.getValue().getLocality() != Locality.THREAD_LOCAL && stream.getValue().getLocality() != Locality.CONTAINER_LOCAL) {
                memoryPerPartition += stream.getKey().getValue(PortContext.BUFFER_MEMORY_MB);
            }
        }
        memoryPerPartition += pp.getValue(OperatorContext.MEMORY_MB);
    }
    int requiredMemoryMB = (mainPC.newPartitions.size() - mainPC.currentPartitions.size()) * memoryPerPartition;
    if (requiredMemoryMB > availableMemoryMB) {
        LOG.warn("Insufficient headroom for repartitioning: available {}m required {}m", availableMemoryMB, requiredMemoryMB);
        return;
    }
    List<Partition<Operator>> addedPartitions = new ArrayList<>();
    // determine modifications of partition set, identify affected operator instance(s)
    for (Partition<Operator> newPartition : mainPC.newPartitions) {
        PTOperator op = mainPC.currentPartitionMap.remove(newPartition);
        if (op == null) {
            addedPartitions.add(newPartition);
        } else {
            // check whether mapping was changed
            for (DefaultPartition<Operator> pi : mainPC.currentPartitions) {
                if (pi == newPartition && pi.isModified()) {
                    // existing partition changed (operator or partition keys)
                    // remove/add to update subscribers and state
                    mainPC.currentPartitionMap.put(newPartition, op);
                    addedPartitions.add(newPartition);
                }
            }
        }
    }
    // remaining entries represent deprecated partitions
    this.undeployOpers.addAll(mainPC.currentPartitionMap.values());
    // downstream dependencies require redeploy, resolve prior to modifying plan
    Set<PTOperator> deps = this.getDependents(mainPC.currentPartitionMap.values());
    this.undeployOpers.addAll(deps);
    // dependencies need redeploy, except operators excluded in remove
    this.deployOpers.addAll(deps);
    // process parallel partitions before removing operators from the plan
    LinkedHashMap<PMapping, RepartitionContext> partitionContexts = Maps.newLinkedHashMap();
    Stack<OperatorMeta> parallelPartitions = new Stack<>();
    parallelPartitions.addAll(currentMapping.parallelPartitions);
    pendingLoop: while (!parallelPartitions.isEmpty()) {
        OperatorMeta ppMeta = parallelPartitions.pop();
        for (StreamMeta s : ppMeta.getInputStreams().values()) {
            if (currentMapping.parallelPartitions.contains(s.getSource().getOperatorMeta()) && parallelPartitions.contains(s.getSource().getOperatorMeta())) {
                parallelPartitions.push(ppMeta);
                parallelPartitions.remove(s.getSource().getOperatorMeta());
                parallelPartitions.push(s.getSource().getOperatorMeta());
                continue pendingLoop;
            }
        }
        LOG.debug("Processing parallel partition {}", ppMeta);
        PMapping ppm = this.logicalToPTOperator.get(ppMeta);
        Partitioner<Operator> ppp = getPartitioner(ppm);
        if (ppp == null) {
            partitionContexts.put(ppm, null);
        } else {
            RepartitionContext pc = new RepartitionContext(ppp, ppm, mainPC.newPartitions.size());
            if (pc.newPartitions == null) {
                throw new IllegalStateException("Partitioner returns null for parallel partition " + ppm.logicalOperator);
            }
            partitionContexts.put(ppm, pc);
        }
    }
    // plan updates start here, after all changes were identified
    // remove obsolete operators first, any freed resources
    // can subsequently be used for new/modified partitions
    List<PTOperator> copyPartitions = Lists.newArrayList(currentMapping.partitions);
    // remove deprecated partitions from plan
    for (PTOperator p : mainPC.currentPartitionMap.values()) {
        copyPartitions.remove(p);
        removePartition(p, currentMapping);
        mainPC.operatorIdToPartition.remove(p.getId());
    }
    currentMapping.partitions = copyPartitions;
    // add new operators
    for (Partition<Operator> newPartition : addedPartitions) {
        PTOperator p = addPTOperator(currentMapping, newPartition, mainPC.minCheckpoint);
        mainPC.operatorIdToPartition.put(p.getId(), newPartition);
    }
    // process parallel partition changes
    for (Map.Entry<PMapping, RepartitionContext> e : partitionContexts.entrySet()) {
        if (e.getValue() == null) {
            // no partitioner, add required operators
            for (int i = 0; i < addedPartitions.size(); i++) {
                LOG.debug("Automatically adding to parallel partition {}", e.getKey());
                // set activation windowId to confirm to upstream checkpoints
                addPTOperator(e.getKey(), null, mainPC.minCheckpoint);
            }
        } else {
            RepartitionContext pc = e.getValue();
            // track previous parallel partition mapping
            Map<Partition<Operator>, Partition<Operator>> prevMapping = Maps.newHashMap();
            for (int i = 0; i < mainPC.currentPartitions.size(); i++) {
                prevMapping.put(pc.currentPartitions.get(i), mainPC.currentPartitions.get(i));
            }
            // determine which new partitions match upstream, remaining to be treated as new operator
            Map<Partition<Operator>, Partition<Operator>> newMapping = Maps.newHashMap();
            Iterator<Partition<Operator>> itMain = mainPC.newPartitions.iterator();
            Iterator<Partition<Operator>> itParallel = pc.newPartitions.iterator();
            while (itMain.hasNext() && itParallel.hasNext()) {
                newMapping.put(itParallel.next(), itMain.next());
            }
            for (Partition<Operator> newPartition : pc.newPartitions) {
                PTOperator op = pc.currentPartitionMap.remove(newPartition);
                if (op == null) {
                    pc.addedPartitions.add(newPartition);
                } else if (prevMapping.get(newPartition) != newMapping.get(newPartition)) {
                    // upstream partitions don't match, remove/add to replace with new operator
                    pc.currentPartitionMap.put(newPartition, op);
                    pc.addedPartitions.add(newPartition);
                } else {
                    // check whether mapping was changed - based on DefaultPartition implementation
                    for (DefaultPartition<Operator> pi : pc.currentPartitions) {
                        if (pi == newPartition && pi.isModified()) {
                            // existing partition changed (operator or partition keys)
                            // remove/add to update subscribers and state
                            mainPC.currentPartitionMap.put(newPartition, op);
                            pc.addedPartitions.add(newPartition);
                        }
                    }
                }
            }
            if (!pc.currentPartitionMap.isEmpty()) {
                // remove obsolete partitions
                List<PTOperator> cowPartitions = Lists.newArrayList(e.getKey().partitions);
                for (PTOperator p : pc.currentPartitionMap.values()) {
                    cowPartitions.remove(p);
                    removePartition(p, e.getKey());
                    pc.operatorIdToPartition.remove(p.getId());
                }
                e.getKey().partitions = cowPartitions;
            }
            // add new partitions
            for (Partition<Operator> newPartition : pc.addedPartitions) {
                PTOperator oper = addPTOperator(e.getKey(), newPartition, mainPC.minCheckpoint);
                pc.operatorIdToPartition.put(oper.getId(), newPartition);
            }
            getPartitioner(e.getKey()).partitioned(pc.operatorIdToPartition);
        }
    }
    updateStreamMappings(currentMapping);
    for (PMapping pp : partitionContexts.keySet()) {
        updateStreamMappings(pp);
    }
    deployChanges();
    if (mainPC.currentPartitions.size() != mainPC.newPartitions.size()) {
        StramEvent ev = new StramEvent.PartitionEvent(currentMapping.logicalOperator.getName(), mainPC.currentPartitions.size(), mainPC.newPartitions.size());
        ev.setReason(note);
        this.ctx.recordEventAsync(ev);
    }
    partitioner.partitioned(mainPC.operatorIdToPartition);
}
Also used : Operator(com.datatorrent.api.Operator) StramEvent(com.datatorrent.stram.api.StramEvent) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) StreamMeta(com.datatorrent.stram.plan.logical.LogicalPlan.StreamMeta) OutputPortMeta(com.datatorrent.stram.plan.logical.LogicalPlan.OutputPortMeta) Partitioner(com.datatorrent.api.Partitioner) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) OperatorMeta(com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta) Checkpoint(com.datatorrent.stram.api.Checkpoint) Stack(java.util.Stack) DefaultPartition(com.datatorrent.api.DefaultPartition) Map(java.util.Map) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) LinkedHashMap(java.util.LinkedHashMap)

Example 3 with Partitioner

use of com.datatorrent.api.Partitioner in project apex-core by apache.

the class PhysicalPlan method initPartitioning.

private void initPartitioning(PMapping m, int partitionCnt) {
    Operator operator = m.logicalOperator.getOperator();
    Collection<Partition<Operator>> partitions;
    @SuppressWarnings("unchecked") Partitioner<Operator> partitioner = m.logicalOperator.getAttributes().contains(OperatorContext.PARTITIONER) ? (Partitioner<Operator>) m.logicalOperator.getValue(OperatorContext.PARTITIONER) : operator instanceof Partitioner ? (Partitioner<Operator>) operator : null;
    Collection<Partition<Operator>> collection = new ArrayList<>(1);
    DefaultPartition<Operator> firstPartition = new DefaultPartition<>(operator);
    collection.add(firstPartition);
    if (partitioner != null) {
        partitions = partitioner.definePartitions(collection, new PartitioningContextImpl(m, partitionCnt));
        if (partitions == null || partitions.isEmpty()) {
            throw new IllegalStateException("Partitioner returns null or empty.");
        }
    } else {
        //Non zero in the case of parallel partitioning.
        for (int partitionCounter = 0; partitionCounter < partitionCnt - 1; partitionCounter++) {
            collection.add(firstPartition);
        }
        partitions = collection;
    }
    Collection<StatsListener> statsListeners = m.logicalOperator.getValue(OperatorContext.STATS_LISTENERS);
    if (statsListeners != null && !statsListeners.isEmpty()) {
        if (m.statsHandlers == null) {
            m.statsHandlers = new ArrayList<>(statsListeners.size());
        }
        m.statsHandlers.addAll(statsListeners);
    }
    if (m.logicalOperator.getOperator() instanceof StatsListener) {
        if (m.statsHandlers == null) {
            m.statsHandlers = new ArrayList<>(1);
        }
        m.statsHandlers.add(new StatsListenerProxy(m.logicalOperator));
    }
    // create operator instance per partition
    Map<Integer, Partition<Operator>> operatorIdToPartition = Maps.newHashMapWithExpectedSize(partitions.size());
    for (Partition<Operator> partition : partitions) {
        PTOperator p = addPTOperator(m, partition, null);
        operatorIdToPartition.put(p.getId(), partition);
    }
    if (partitioner != null) {
        partitioner.partitioned(operatorIdToPartition);
    }
}
Also used : Operator(com.datatorrent.api.Operator) Partition(com.datatorrent.api.Partitioner.Partition) DefaultPartition(com.datatorrent.api.DefaultPartition) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) StatsListener(com.datatorrent.api.StatsListener) Checkpoint(com.datatorrent.stram.api.Checkpoint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DefaultPartition(com.datatorrent.api.DefaultPartition) Partitioner(com.datatorrent.api.Partitioner)

Aggregations

Operator (com.datatorrent.api.Operator)3 Partitioner (com.datatorrent.api.Partitioner)3 DefaultPartition (com.datatorrent.api.DefaultPartition)2 Partition (com.datatorrent.api.Partitioner.Partition)2 Checkpoint (com.datatorrent.stram.api.Checkpoint)2 ArrayList (java.util.ArrayList)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 StatsListener (com.datatorrent.api.StatsListener)1 StramEvent (com.datatorrent.stram.api.StramEvent)1 OperatorMeta (com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta)1 OutputPortMeta (com.datatorrent.stram.plan.logical.LogicalPlan.OutputPortMeta)1 StreamMeta (com.datatorrent.stram.plan.logical.LogicalPlan.StreamMeta)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Stack (java.util.Stack)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1