Search in sources :

Example 1 with SocketReaderMeta

use of org.pentaho.di.trans.steps.socketreader.SocketReaderMeta in project pentaho-kettle by pentaho.

the class TransSplitter method splitOriginalTransformation.

public void splitOriginalTransformation() throws KettleException {
    clear();
    // Mixing clusters is not supported at the moment
    // Perform some basic checks on the cluster configuration.
    // 
    findUsedOriginalSteps();
    checkClusterConfiguration();
    generateSlavePartitionSchemas();
    try {
        SlaveServer masterSlaveServer = getMasterServer();
        masterTransMeta = getOriginalCopy(false, null, null);
        ClusterSchema clusterSchema = originalTransformation.findFirstUsedClusterSchema();
        List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();
        int nrSlavesNodes = clusterSchema.findNrSlaves();
        boolean encrypt = false;
        byte[] transformationKey = null;
        PublicKey pubK = null;
        if (encrypt) {
            KeyPair pair = CertificateGenEncryptUtil.generateKeyPair();
            pubK = pair.getPublic();
            PrivateKey privK = pair.getPrivate();
            Key key1 = CertificateGenEncryptUtil.generateSingleKey();
            try {
                transformationKey = CertificateGenEncryptUtil.encodeKeyForTransmission(privK, key1);
            } catch (InvalidKeyException ex) {
                masterTransMeta.getLogChannel().logError("Invalid key was used for encoding", ex);
            } catch (IllegalBlockSizeException ex) {
                masterTransMeta.getLogChannel().logError("Error happenned during key encoding", ex);
            } catch (Exception ex) {
                masterTransMeta.getLogChannel().logError("Error happenned during encryption initialization", ex);
            }
        }
        for (int r = 0; r < referenceSteps.length; r++) {
            StepMeta referenceStep = referenceSteps[r];
            List<StepMeta> prevSteps = originalTransformation.findPreviousSteps(referenceStep);
            int nrPreviousSteps = prevSteps.size();
            for (int p = 0; p < nrPreviousSteps; p++) {
                StepMeta previousStep = prevSteps.get(p);
                if (!referenceStep.isClustered()) {
                    if (!previousStep.isClustered()) {
                        // No clustering involved here: just add the reference step to the master
                        // 
                        StepMeta target = masterTransMeta.findStep(referenceStep.getName());
                        if (target == null) {
                            target = (StepMeta) referenceStep.clone();
                            masterTransMeta.addStep(target);
                        }
                        StepMeta source = masterTransMeta.findStep(previousStep.getName());
                        if (source == null) {
                            source = (StepMeta) previousStep.clone();
                            masterTransMeta.addStep(source);
                        }
                        // Add a hop too...
                        // 
                        TransHopMeta masterHop = new TransHopMeta(source, target);
                        masterTransMeta.addTransHop(masterHop);
                    } else {
                        // reference step is NOT clustered
                        // Previous step is clustered
                        // --> We read from the slave server using socket readers.
                        // We need a reader for each slave server in the cluster
                        // 
                        // Also add the reference step to the master. (cloned)
                        // 
                        StepMeta masterStep = masterTransMeta.findStep(referenceStep.getName());
                        if (masterStep == null) {
                            masterStep = (StepMeta) referenceStep.clone();
                            masterStep.setLocation(masterStep.getLocation().x, masterStep.getLocation().y);
                            masterTransMeta.addStep(masterStep);
                        }
                        Queue<Integer> masterStepCopyNumbers = new LinkedList<Integer>();
                        for (int i = 0; i < masterStep.getCopies(); i++) {
                            masterStepCopyNumbers.add(i);
                        }
                        // 
                        for (int slaveNr = 0; slaveNr < slaveServers.size(); slaveNr++) {
                            SlaveServer sourceSlaveServer = slaveServers.get(slaveNr);
                            if (!sourceSlaveServer.isMaster()) {
                                // MASTER: add remote input steps to the master step. That way it can receive data over sockets.
                                // 
                                // SLAVE : add remote output steps to the previous step
                                // 
                                TransMeta slave = getSlaveTransformation(clusterSchema, sourceSlaveServer);
                                // See if we can add a link to the previous using the Remote Steps concept.
                                // 
                                StepMeta slaveStep = slave.findStep(previousStep.getName());
                                if (slaveStep == null) {
                                    slaveStep = addSlaveCopy(slave, previousStep, sourceSlaveServer);
                                }
                                // Make sure the data finds its way back to the master.
                                // 
                                // Verify the partitioning for this slave step.
                                // It's running in 1 or more copies depending on the number of partitions
                                // Get the number of target partitions...
                                // 
                                StepPartitioningMeta previousStepPartitioningMeta = previousStep.getStepPartitioningMeta();
                                PartitionSchema previousPartitionSchema = previousStepPartitioningMeta.getPartitionSchema();
                                int nrOfSourceCopies = determineNrOfStepCopies(sourceSlaveServer, previousStep);
                                // 
                                if (masterStep.getCopies() != 1 && masterStep.getCopies() != nrOfSourceCopies) {
                                    // this case might be handled correctly later
                                    String message = BaseMessages.getString(PKG, "TransSplitter.Clustering.CopyNumberStep", nrSlavesNodes, previousStep.getName(), masterStep.getName());
                                    throw new KettleException(message);
                                }
                                // 
                                for (int sourceCopyNr = 0; sourceCopyNr < nrOfSourceCopies; sourceCopyNr++) {
                                    // The masterStepCopy number is increasing for each remote copy on each slave.
                                    // This makes the master distribute to each copy of the slave properly.
                                    // There is a check above to make sure that the master has either 1 copy or the same as slave*copies
                                    Integer masterStepCopyNr = masterStepCopyNumbers.poll();
                                    if (masterStepCopyNr == null) {
                                        masterStepCopyNr = 0;
                                    }
                                    // We open a port on the various slave servers...
                                    // So the source is the slave server, the target the master.
                                    // 
                                    int port = getPort(clusterSchema, sourceSlaveServer, slaveStep.getName(), sourceCopyNr, masterSlaveServer, masterStep.getName(), masterStepCopyNr);
                                    RemoteStep remoteMasterStep = new RemoteStep(sourceSlaveServer.getHostname(), masterSlaveServer.getHostname(), Integer.toString(port), slaveStep.getName(), sourceCopyNr, masterStep.getName(), masterStepCopyNr, sourceSlaveServer.getName(), masterSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                    remoteMasterStep.setEncryptingStreams(encrypt);
                                    remoteMasterStep.setKey(transformationKey);
                                    masterStep.getRemoteInputSteps().add(remoteMasterStep);
                                    RemoteStep remoteSlaveStep = new RemoteStep(sourceSlaveServer.getHostname(), masterSlaveServer.getHostname(), Integer.toString(port), slaveStep.getName(), sourceCopyNr, masterStep.getName(), masterStepCopyNr, sourceSlaveServer.getName(), masterSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                    remoteSlaveStep.setEncryptingStreams(encrypt);
                                    remoteSlaveStep.setKey(transformationKey);
                                    slaveStep.getRemoteOutputSteps().add(remoteSlaveStep);
                                    // 
                                    if (slaveStep.isPartitioned()) {
                                        slaveStepCopyPartitionDistribution.addPartition(sourceSlaveServer.getName(), previousPartitionSchema.getName(), sourceCopyNr);
                                    }
                                }
                                // 
                                if (referenceStep.isPartitioned()) {
                                    // Set the target partitioning schema for the source step (master)
                                    // 
                                    StepPartitioningMeta stepPartitioningMeta = previousStepPartitioningMeta.clone();
                                    PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                    partitionSchema.setName(createTargetPartitionSchemaName(partitionSchema.getName()));
                                    if (partitionSchema.isDynamicallyDefined()) {
                                        // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                        // 
                                        partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                    }
                                    masterStep.setTargetStepPartitioningMeta(stepPartitioningMeta);
                                    masterTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                    // Now set the partitioning schema for the slave step...
                                    // For the slave step, we only should those partition IDs that are interesting for the current
                                    // slave...
                                    // 
                                    stepPartitioningMeta = previousStepPartitioningMeta.clone();
                                    partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                    partitionSchema.setName(createSlavePartitionSchemaName(partitionSchema.getName()));
                                    if (partitionSchema.isDynamicallyDefined()) {
                                        // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                        // 
                                        partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                    }
                                    partitionSchema.retainPartitionsForSlaveServer(clusterSchema.findNrSlaves(), getSlaveServerNumber(clusterSchema, sourceSlaveServer));
                                    slave.addOrReplacePartitionSchema(partitionSchema);
                                }
                            }
                        }
                    }
                } else {
                    if (!previousStep.isClustered()) {
                        // reference step is clustered
                        // previous step is not clustered
                        // --> Add a socket writer for each slave server
                        // 
                        // MASTER : add remote output step to the previous step
                        // 
                        StepMeta sourceStep = masterTransMeta.findStep(previousStep.getName());
                        if (sourceStep == null) {
                            sourceStep = (StepMeta) previousStep.clone();
                            sourceStep.setLocation(previousStep.getLocation().x, previousStep.getLocation().y);
                            masterTransMeta.addStep(sourceStep);
                        }
                        Queue<Integer> masterStepCopyNumbers = new LinkedList<Integer>();
                        for (int i = 0; i < sourceStep.getCopies(); i++) {
                            masterStepCopyNumbers.add(i);
                        }
                        for (int s = 0; s < slaveServers.size(); s++) {
                            SlaveServer targetSlaveServer = slaveServers.get(s);
                            if (!targetSlaveServer.isMaster()) {
                                // SLAVE : add remote input step to the reference slave step...
                                // 
                                TransMeta slaveTransMeta = getSlaveTransformation(clusterSchema, targetSlaveServer);
                                // also add the step itself.
                                StepMeta targetStep = slaveTransMeta.findStep(referenceStep.getName());
                                if (targetStep == null) {
                                    targetStep = addSlaveCopy(slaveTransMeta, referenceStep, targetSlaveServer);
                                }
                                // Verify the partitioning for this slave step.
                                // It's running in 1 or more copies depending on the number of partitions
                                // Get the number of target partitions...
                                // 
                                StepPartitioningMeta targetStepPartitioningMeta = referenceStep.getStepPartitioningMeta();
                                PartitionSchema targetPartitionSchema = targetStepPartitioningMeta.getPartitionSchema();
                                int nrOfTargetCopies = determineNrOfStepCopies(targetSlaveServer, referenceStep);
                                // 
                                for (int targetCopyNr = 0; targetCopyNr < nrOfTargetCopies; targetCopyNr++) {
                                    // The masterStepCopy number is increasing for each remote copy on each slave.
                                    // This makes the master distribute to each copy of the slave properly.
                                    // There is a check above to make sure that the master has either 1 copy or the same as slave*copies
                                    Integer masterStepCopyNr = masterStepCopyNumbers.poll();
                                    if (masterStepCopyNr == null) {
                                        masterStepCopyNr = 0;
                                    }
                                    // The master step opens server socket ports
                                    // So the IP address should be the same, in this case, the master...
                                    // 
                                    int port = getPort(clusterSchema, masterSlaveServer, sourceStep.getName(), masterStepCopyNr, targetSlaveServer, referenceStep.getName(), targetCopyNr);
                                    RemoteStep remoteMasterStep = new RemoteStep(masterSlaveServer.getHostname(), targetSlaveServer.getHostname(), Integer.toString(port), sourceStep.getName(), masterStepCopyNr, referenceStep.getName(), targetCopyNr, masterSlaveServer.getName(), targetSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                    remoteMasterStep.setEncryptingStreams(encrypt);
                                    remoteMasterStep.setKey(transformationKey);
                                    sourceStep.getRemoteOutputSteps().add(remoteMasterStep);
                                    RemoteStep remoteSlaveStep = new RemoteStep(masterSlaveServer.getHostname(), targetSlaveServer.getHostname(), Integer.toString(port), sourceStep.getName(), masterStepCopyNr, referenceStep.getName(), targetCopyNr, masterSlaveServer.getName(), targetSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                    remoteSlaveStep.setEncryptingStreams(encrypt);
                                    remoteSlaveStep.setKey(transformationKey);
                                    targetStep.getRemoteInputSteps().add(remoteSlaveStep);
                                    // 
                                    if (targetStep.isPartitioned()) {
                                        slaveStepCopyPartitionDistribution.addPartition(targetSlaveServer.getName(), targetPartitionSchema.getName(), targetCopyNr);
                                    }
                                }
                                // 
                                if (targetStepPartitioningMeta.isPartitioned()) {
                                    // Set the target partitioning schema for the source step (master)
                                    // 
                                    StepPartitioningMeta stepPartitioningMeta = targetStepPartitioningMeta.clone();
                                    PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                    partitionSchema.setName(createTargetPartitionSchemaName(partitionSchema.getName()));
                                    if (partitionSchema.isDynamicallyDefined()) {
                                        // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                        // 
                                        partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                    }
                                    sourceStep.setTargetStepPartitioningMeta(stepPartitioningMeta);
                                    masterTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                    // Now set the partitioning schema for the slave step...
                                    // For the slave step, we only should those partition IDs that are interesting for the current
                                    // slave...
                                    // 
                                    stepPartitioningMeta = targetStepPartitioningMeta.clone();
                                    partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                    partitionSchema.setName(createSlavePartitionSchemaName(partitionSchema.getName()));
                                    if (partitionSchema.isDynamicallyDefined()) {
                                        // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                        // 
                                        partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                    }
                                    partitionSchema.retainPartitionsForSlaveServer(clusterSchema.findNrSlaves(), getSlaveServerNumber(clusterSchema, targetSlaveServer));
                                    slaveTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                }
                            }
                        }
                    } else {
                        // 
                        for (int slaveNr = 0; slaveNr < slaveServers.size(); slaveNr++) {
                            SlaveServer targetSlaveServer = slaveServers.get(slaveNr);
                            if (!targetSlaveServer.isMaster()) {
                                // SLAVE
                                TransMeta slaveTransMeta = getSlaveTransformation(clusterSchema, targetSlaveServer);
                                // This is the target step
                                // 
                                StepMeta targetStep = slaveTransMeta.findStep(referenceStep.getName());
                                if (targetStep == null) {
                                    targetStep = addSlaveCopy(slaveTransMeta, referenceStep, targetSlaveServer);
                                }
                                // This is the source step
                                // 
                                StepMeta sourceStep = slaveTransMeta.findStep(previousStep.getName());
                                if (sourceStep == null) {
                                    sourceStep = addSlaveCopy(slaveTransMeta, previousStep, targetSlaveServer);
                                }
                                // Add a hop between source and target
                                // 
                                TransHopMeta slaveHop = new TransHopMeta(sourceStep, targetStep);
                                slaveTransMeta.addTransHop(slaveHop);
                                // Verify the partitioning
                                // That means is this case that it is possible that
                                // 
                                // 1) the number of partitions is larger than the number of slaves
                                // 2) the partitioning method might change requiring the source step to do re-partitioning.
                                // 
                                // We need to provide the source step with the information to re-partition correctly.
                                // 
                                // Case 1: both source and target are partitioned on the same partition schema.
                                // 
                                StepPartitioningMeta sourceStepPartitioningMeta = previousStep.getStepPartitioningMeta();
                                StepPartitioningMeta targetStepPartitioningMeta = referenceStep.getStepPartitioningMeta();
                                if (previousStep.isPartitioned() && referenceStep.isPartitioned() && sourceStepPartitioningMeta.equals(targetStepPartitioningMeta)) {
                                    // Just divide the partitions over the available slaves...
                                    // set the appropriate partition schema for both step...
                                    // 
                                    StepPartitioningMeta stepPartitioningMeta = sourceStepPartitioningMeta.clone();
                                    PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                    partitionSchema.setName(createSlavePartitionSchemaName(partitionSchema.getName()));
                                    if (partitionSchema.isDynamicallyDefined()) {
                                        partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                    }
                                    partitionSchema.retainPartitionsForSlaveServer(clusterSchema.findNrSlaves(), getSlaveServerNumber(clusterSchema, targetSlaveServer));
                                    sourceStep.setStepPartitioningMeta(stepPartitioningMeta);
                                    targetStep.setStepPartitioningMeta(stepPartitioningMeta);
                                    slaveTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                } else if ((!previousStep.isPartitioned() && referenceStep.isPartitioned()) || (previousStep.isPartitioned() && referenceStep.isPartitioned() && !sourceStepPartitioningMeta.equals(targetStep.getStepPartitioningMeta()))) {
                                    // Case 2: both source and target are partitioned on a different partition schema.
                                    // Case 3: source is not partitioned, target is partitioned.
                                    // 
                                    // --> This means that we're re-partitioning!!
                                    // 
                                    PartitionSchema targetPartitionSchema = targetStepPartitioningMeta.getPartitionSchema();
                                    PartitionSchema sourcePartitionSchema = sourceStepPartitioningMeta.getPartitionSchema();
                                    // 
                                    for (int partSlaveNr = 0; partSlaveNr < slaveServers.size(); partSlaveNr++) {
                                        SlaveServer sourceSlaveServer = slaveServers.get(partSlaveNr);
                                        if (!sourceSlaveServer.isMaster()) {
                                            // It's running in 1 or more copies depending on the number of partitions
                                            // Get the number of target partitions...
                                            // 
                                            Map<PartitionSchema, List<String>> partitionsMap = slaveServerPartitionsMap.get(sourceSlaveServer);
                                            int nrOfTargetPartitions = 1;
                                            if (targetStep.isPartitioned() && targetPartitionSchema != null) {
                                                List<String> targetPartitionsList = partitionsMap.get(targetPartitionSchema);
                                                nrOfTargetPartitions = targetPartitionsList.size();
                                            } else if (targetStep.getCopies() > 1) {
                                                nrOfTargetPartitions = targetStep.getCopies();
                                            }
                                            // Get the number of source partitions...
                                            // 
                                            int nrOfSourcePartitions = 1;
                                            if (sourceStep.isPartitioned() && sourcePartitionSchema != null) {
                                                List<String> sourcePartitionsList = partitionsMap.get(sourcePartitionSchema);
                                                nrOfSourcePartitions = sourcePartitionsList.size();
                                            } else if (sourceStep.getCopies() > 1) {
                                                nrOfSourcePartitions = sourceStep.getCopies();
                                            }
                                            // 
                                            for (int sourceCopyNr = 0; sourceCopyNr < nrOfSourcePartitions; sourceCopyNr++) {
                                                for (int targetCopyNr = 0; targetCopyNr < nrOfTargetPartitions; targetCopyNr++) {
                                                    if (!targetSlaveServer.equals(sourceSlaveServer)) {
                                                        // We hit only get the remote steps, NOT the local ones.
                                                        // That's why it's OK to generate all combinations.
                                                        // 
                                                        int outPort = getPort(clusterSchema, targetSlaveServer, sourceStep.getName(), sourceCopyNr, sourceSlaveServer, targetStep.getName(), targetCopyNr);
                                                        RemoteStep remoteOutputStep = new RemoteStep(targetSlaveServer.getHostname(), sourceSlaveServer.getHostname(), Integer.toString(outPort), sourceStep.getName(), sourceCopyNr, targetStep.getName(), targetCopyNr, targetSlaveServer.getName(), sourceSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                                        remoteOutputStep.setEncryptingStreams(encrypt);
                                                        remoteOutputStep.setKey(transformationKey);
                                                        sourceStep.getRemoteOutputSteps().add(remoteOutputStep);
                                                        // OK, so the source step is sending rows out on the reserved ports
                                                        // What we need to do now is link all the OTHER slaves up to them.
                                                        // 
                                                        int inPort = getPort(clusterSchema, sourceSlaveServer, sourceStep.getName(), sourceCopyNr, targetSlaveServer, targetStep.getName(), targetCopyNr);
                                                        RemoteStep remoteInputStep = new RemoteStep(sourceSlaveServer.getHostname(), targetSlaveServer.getHostname(), Integer.toString(inPort), sourceStep.getName(), sourceCopyNr, targetStep.getName(), targetCopyNr, sourceSlaveServer.getName(), targetSlaveServer.getName(), socketsBufferSize, compressingSocketStreams, originalTransformation.getStepFields(previousStep));
                                                        remoteInputStep.setEncryptingStreams(encrypt);
                                                        remoteInputStep.setKey(transformationKey);
                                                        targetStep.getRemoteInputSteps().add(remoteInputStep);
                                                    }
                                                    // OK, save the partition number for the target step in the partition distribution...
                                                    // 
                                                    slaveStepCopyPartitionDistribution.addPartition(sourceSlaveServer.getName(), targetPartitionSchema.getName(), targetCopyNr);
                                                }
                                            }
                                            if (sourceStepPartitioningMeta.isPartitioned()) {
                                                // Set the correct partitioning schema for the source step.
                                                // 
                                                // Set the target partitioning schema for the target step (slave)
                                                // 
                                                StepPartitioningMeta stepPartitioningMeta = sourceStepPartitioningMeta.clone();
                                                PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                                partitionSchema.setName(createSlavePartitionSchemaName(partitionSchema.getName()));
                                                if (partitionSchema.isDynamicallyDefined()) {
                                                    // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                                    // 
                                                    partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                                }
                                                partitionSchema.retainPartitionsForSlaveServer(clusterSchema.findNrSlaves(), getSlaveServerNumber(clusterSchema, targetSlaveServer));
                                                sourceStep.setStepPartitioningMeta(stepPartitioningMeta);
                                                slaveTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                            }
                                            if (targetStepPartitioningMeta.isPartitioned()) {
                                                // Set the target partitioning schema for the target step (slave)
                                                // 
                                                StepPartitioningMeta stepPartitioningMeta = targetStepPartitioningMeta.clone();
                                                PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                                partitionSchema.setName(createSlavePartitionSchemaName(partitionSchema.getName()));
                                                if (partitionSchema.isDynamicallyDefined()) {
                                                    partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                                }
                                                partitionSchema.retainPartitionsForSlaveServer(clusterSchema.findNrSlaves(), getSlaveServerNumber(clusterSchema, targetSlaveServer));
                                                targetStep.setStepPartitioningMeta(stepPartitioningMeta);
                                                slaveTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                            }
                                            // 
                                            if (!sourceStepPartitioningMeta.isPartitioned() || !sourceStepPartitioningMeta.equals(targetStepPartitioningMeta)) {
                                                // Not partitioned means the target is partitioned.
                                                // Set the target partitioning on the source...
                                                // Set the correct partitioning schema for the source step.
                                                // 
                                                // Set the target partitioning schema for the target step (slave)
                                                // 
                                                StepPartitioningMeta stepPartitioningMeta = targetStepPartitioningMeta.clone();
                                                PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
                                                partitionSchema.setName(createTargetPartitionSchemaName(partitionSchema.getName()));
                                                if (partitionSchema.isDynamicallyDefined()) {
                                                    // Expand the cluster definition to: nrOfSlaves*nrOfPartitionsPerSlave...
                                                    // 
                                                    partitionSchema.expandPartitionsDynamically(clusterSchema.findNrSlaves(), originalTransformation);
                                                }
                                                sourceStep.setTargetStepPartitioningMeta(stepPartitioningMeta);
                                                slaveTransMeta.addOrReplacePartitionSchema(partitionSchema);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            if (nrPreviousSteps == 0) {
                if (!referenceStep.isClustered()) {
                    // Not clustered, simply add the step.
                    if (masterTransMeta.findStep(referenceStep.getName()) == null) {
                        masterTransMeta.addStep((StepMeta) referenceStep.clone());
                    }
                } else {
                    for (int s = 0; s < slaveServers.size(); s++) {
                        SlaveServer slaveServer = slaveServers.get(s);
                        if (!slaveServer.isMaster()) {
                            // SLAVE
                            TransMeta slave = getSlaveTransformation(clusterSchema, slaveServer);
                            if (slave.findStep(referenceStep.getName()) == null) {
                                addSlaveCopy(slave, referenceStep, slaveServer);
                            }
                        }
                    }
                }
            }
        }
        // 
        for (int i = 0; i < referenceSteps.length; i++) {
            StepMeta originalStep = referenceSteps[i];
            // Also take care of the info steps...
            // For example: StreamLookup, Table Input, etc.
            // 
            StepMeta[] infoSteps = originalTransformation.getInfoStep(originalStep);
            for (int p = 0; infoSteps != null && p < infoSteps.length; p++) {
                StepMeta infoStep = infoSteps[p];
                if (infoStep != null) {
                    if (!originalStep.isClustered()) {
                        if (!infoStep.isClustered()) {
                            // No clustering involved here: just add a link between the reference step and the infostep
                            // 
                            StepMeta target = masterTransMeta.findStep(originalStep.getName());
                            StepMeta source = masterTransMeta.findStep(infoStep.getName());
                            // Add a hop too...
                            TransHopMeta masterHop = new TransHopMeta(source, target);
                            masterTransMeta.addTransHop(masterHop);
                        } else {
                            // reference step is NOT clustered
                            // Previous step is clustered
                            // --> We read from the slave server using socket readers.
                            // We need a reader for each slave server in the cluster
                            // On top of that we need to merge the data from all these steps using a dummy. (to make sure)
                            // That dummy needs to feed into Merge Join
                            // 
                            int nrSlaves = clusterSchema.getSlaveServers().size();
                            for (int s = 0; s < nrSlaves; s++) {
                                SlaveServer sourceSlaveServer = clusterSchema.getSlaveServers().get(s);
                                if (!sourceSlaveServer.isMaster()) {
                                    // //////////////////////////////////////////////////////////////////////////////////////////
                                    // On the SLAVES: add a socket writer...
                                    // 
                                    TransMeta slave = getSlaveTransformation(clusterSchema, sourceSlaveServer);
                                    SocketWriterMeta socketWriterMeta = new SocketWriterMeta();
                                    int port = getPort(clusterSchema, sourceSlaveServer, infoStep.getName(), 0, masterSlaveServer, originalStep.getName(), 0);
                                    socketWriterMeta.setPort("" + port);
                                    socketWriterMeta.setBufferSize(clusterSchema.getSocketsBufferSize());
                                    socketWriterMeta.setFlushInterval(clusterSchema.getSocketsFlushInterval());
                                    socketWriterMeta.setCompressed(clusterSchema.isSocketsCompressed());
                                    StepMeta writerStep = new StepMeta(getWriterName(clusterSchema, sourceSlaveServer, infoStep.getName(), 0, masterSlaveServer, originalStep.getName(), 0), socketWriterMeta);
                                    writerStep.setLocation(infoStep.getLocation().x + 50, infoStep.getLocation().y + 50);
                                    writerStep.setDraw(true);
                                    slave.addStep(writerStep);
                                    // We also need to add a hop between infoStep and the new writer step
                                    // 
                                    TransHopMeta slaveHop = new TransHopMeta(infoStep, writerStep);
                                    if (slave.findTransHop(slaveHop) == null) {
                                        slave.addTransHop(slaveHop);
                                    }
                                    // //////////////////////////////////////////////////////////////////////////////////////////
                                    // On the MASTER : add a socket reader and a dummy step to merge the data...
                                    // 
                                    SocketReaderMeta socketReaderMeta = new SocketReaderMeta();
                                    socketReaderMeta.setPort("" + port);
                                    socketReaderMeta.setBufferSize(clusterSchema.getSocketsBufferSize());
                                    socketReaderMeta.setCompressed(clusterSchema.isSocketsCompressed());
                                    StepMeta readerStep = new StepMeta(getReaderName(clusterSchema, sourceSlaveServer, infoStep.getName(), 0, masterSlaveServer, originalStep.getName(), 0), socketReaderMeta);
                                    readerStep.setLocation(infoStep.getLocation().x, infoStep.getLocation().y + (s * FANOUT * 2) - (nrSlaves * FANOUT / 2));
                                    readerStep.setDraw(true);
                                    masterTransMeta.addStep(readerStep);
                                    // Also add a single dummy step in the master that will merge the data from the slave
                                    // transformations.
                                    // 
                                    String dummyName = infoStep.getName();
                                    StepMeta dummyStep = masterTransMeta.findStep(dummyName);
                                    if (dummyStep == null) {
                                        DummyTransMeta dummy = new DummyTransMeta();
                                        dummyStep = new StepMeta(dummyName, dummy);
                                        dummyStep.setLocation(infoStep.getLocation().x + (SPLIT / 2), infoStep.getLocation().y);
                                        dummyStep.setDraw(true);
                                        dummyStep.setDescription("This step merges the data from the various data streams coming " + "from the slave transformations.\nIt does that right before it hits the step that " + "reads from a specific (info) step.");
                                        masterTransMeta.addStep(dummyStep);
                                        // Now we need a hop from the dummy merge step to the actual target step (original step)
                                        // 
                                        StepMeta masterTargetStep = masterTransMeta.findStep(originalStep.getName());
                                        TransHopMeta targetHop = new TransHopMeta(dummyStep, masterTargetStep);
                                        masterTransMeta.addTransHop(targetHop);
                                        // Set the master target step as an info step... (use the cloned copy)
                                        // 
                                        String[] infoStepNames = masterTargetStep.getStepMetaInterface().getStepIOMeta().getInfoStepnames();
                                        if (infoStepNames != null) {
                                            StepMeta[] is = new StepMeta[infoStepNames.length];
                                            for (int n = 0; n < infoStepNames.length; n++) {
                                                // OK, info steps moved to the slave steps
                                                is[n] = slave.findStep(infoStepNames[n]);
                                                if (infoStepNames[n].equals(infoStep.getName())) {
                                                    // We want to replace this one with the reader step: that's where we source from now
                                                    infoSteps[n] = readerStep;
                                                }
                                            }
                                            masterTargetStep.getStepMetaInterface().getStepIOMeta().setInfoSteps(infoSteps);
                                        }
                                    }
                                    // Add a hop between the reader step and the dummy
                                    // 
                                    TransHopMeta mergeHop = new TransHopMeta(readerStep, dummyStep);
                                    if (masterTransMeta.findTransHop(mergeHop) == null) {
                                        masterTransMeta.addTransHop(mergeHop);
                                    }
                                }
                            }
                        }
                    } else {
                        if (!infoStep.isClustered()) {
                            // 
                            for (int s = 0; s < slaveServers.size(); s++) {
                                SlaveServer targetSlaveServer = slaveServers.get(s);
                                if (!targetSlaveServer.isMaster()) {
                                    // MASTER
                                    SocketWriterMeta socketWriterMeta = new SocketWriterMeta();
                                    socketWriterMeta.setPort("" + getPort(clusterSchema, masterSlaveServer, infoStep.getName(), 0, targetSlaveServer, originalStep.getName(), 0));
                                    socketWriterMeta.setBufferSize(clusterSchema.getSocketsBufferSize());
                                    socketWriterMeta.setFlushInterval(clusterSchema.getSocketsFlushInterval());
                                    socketWriterMeta.setCompressed(clusterSchema.isSocketsCompressed());
                                    StepMeta writerStep = new StepMeta(getWriterName(clusterSchema, masterSlaveServer, infoStep.getName(), 0, targetSlaveServer, originalStep.getName(), 0), socketWriterMeta);
                                    writerStep.setLocation(originalStep.getLocation().x, originalStep.getLocation().y + (s * FANOUT * 2) - (nrSlavesNodes * FANOUT / 2));
                                    writerStep.setDraw(originalStep.isDrawn());
                                    masterTransMeta.addStep(writerStep);
                                    // The previous step: add a hop to it.
                                    // It still has the original name as it is not clustered.
                                    // 
                                    StepMeta previous = masterTransMeta.findStep(infoStep.getName());
                                    if (previous == null) {
                                        previous = (StepMeta) infoStep.clone();
                                        masterTransMeta.addStep(previous);
                                    }
                                    TransHopMeta masterHop = new TransHopMeta(previous, writerStep);
                                    masterTransMeta.addTransHop(masterHop);
                                    // SLAVE
                                    TransMeta slave = getSlaveTransformation(clusterSchema, targetSlaveServer);
                                    SocketReaderMeta socketReaderMeta = new SocketReaderMeta();
                                    socketReaderMeta.setHostname(masterSlaveServer.getHostname());
                                    socketReaderMeta.setPort("" + getPort(clusterSchema, masterSlaveServer, infoStep.getName(), 0, targetSlaveServer, originalStep.getName(), 0));
                                    socketReaderMeta.setBufferSize(clusterSchema.getSocketsBufferSize());
                                    socketReaderMeta.setCompressed(clusterSchema.isSocketsCompressed());
                                    StepMeta readerStep = new StepMeta(getReaderName(clusterSchema, masterSlaveServer, infoStep.getName(), 0, targetSlaveServer, originalStep.getName(), 0), socketReaderMeta);
                                    readerStep.setLocation(originalStep.getLocation().x - (SPLIT / 2), originalStep.getLocation().y);
                                    readerStep.setDraw(originalStep.isDrawn());
                                    slave.addStep(readerStep);
                                    // also add the step itself.
                                    StepMeta slaveStep = slave.findStep(originalStep.getName());
                                    if (slaveStep == null) {
                                        slaveStep = addSlaveCopy(slave, originalStep, targetSlaveServer);
                                    }
                                    // And a hop from the
                                    TransHopMeta slaveHop = new TransHopMeta(readerStep, slaveStep);
                                    slave.addTransHop(slaveHop);
                                    // 
                                    // Now we have to explain to the slaveStep that it has to source from previous
                                    // 
                                    String[] infoStepNames = slaveStep.getStepMetaInterface().getStepIOMeta().getInfoStepnames();
                                    if (infoStepNames != null) {
                                        StepMeta[] is = new StepMeta[infoStepNames.length];
                                        for (int n = 0; n < infoStepNames.length; n++) {
                                            // OK, info steps moved to the slave steps
                                            is[n] = slave.findStep(infoStepNames[n]);
                                            if (infoStepNames[n].equals(infoStep.getName())) {
                                                // We want to replace this one with the reader step: that's where we source from now
                                                infoSteps[n] = readerStep;
                                            }
                                        }
                                        slaveStep.getStepMetaInterface().getStepIOMeta().setInfoSteps(infoSteps);
                                    }
                                }
                            }
                        } else {
                            // 
                            for (int s = 0; s < slaveServers.size(); s++) {
                                SlaveServer slaveServer = slaveServers.get(s);
                                if (!slaveServer.isMaster()) {
                                    TransMeta slave = getSlaveTransformation(clusterSchema, slaveServer);
                                    StepMeta slaveStep = slave.findStep(originalStep.getName());
                                    String[] infoStepNames = slaveStep.getStepMetaInterface().getStepIOMeta().getInfoStepnames();
                                    if (infoStepNames != null) {
                                        StepMeta[] is = new StepMeta[infoStepNames.length];
                                        for (int n = 0; n < infoStepNames.length; n++) {
                                            // OK, info steps moved to the slave steps
                                            is[n] = slave.findStep(infoStepNames[n]);
                                            // Hang on... is there a hop to the previous step?
                                            if (slave.findTransHop(is[n], slaveStep) == null) {
                                                TransHopMeta infoHop = new TransHopMeta(is[n], slaveStep);
                                                slave.addTransHop(infoHop);
                                            }
                                        }
                                        slaveStep.getStepMetaInterface().getStepIOMeta().setInfoSteps(infoSteps);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        // Also add the original list of partition schemas to the slave step copy partition distribution...
        // 
        slaveStepCopyPartitionDistribution.setOriginalPartitionSchemas(originalTransformation.getPartitionSchemas());
        // 
        for (TransMeta transMeta : slaveTransMap.values()) {
            transMeta.setSlaveStepCopyPartitionDistribution(slaveStepCopyPartitionDistribution);
            if (encrypt) {
                transMeta.setKey(pubK.getEncoded());
                transMeta.setPrivateKey(false);
            }
            transMeta.clearChanged();
        }
        // do not erase partitioning schema for master transformation
        // if some of steps is expected to run on master partitioned, that is the case
        // when partition schema should exists as 'local' partition schema instead of slave's remote one
        // see PDI-12766
        masterTransMeta.setPartitionSchemas(originalTransformation.getPartitionSchemas());
        masterTransMeta.setSlaveStepCopyPartitionDistribution(slaveStepCopyPartitionDistribution);
        if (encrypt) {
            masterTransMeta.setKey(pubK.getEncoded());
            masterTransMeta.setPrivateKey(!false);
        }
        masterTransMeta.clearChanged();
    // We're absolutely done here...
    } catch (Exception e) {
        throw new KettleException("Unexpected problem while generating master transformation", e);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) PrivateKey(java.security.PrivateKey) SocketWriterMeta(org.pentaho.di.trans.steps.socketwriter.SocketWriterMeta) TransMeta(org.pentaho.di.trans.TransMeta) DummyTransMeta(org.pentaho.di.trans.steps.dummytrans.DummyTransMeta) IllegalBlockSizeException(javax.crypto.IllegalBlockSizeException) SlaveServer(org.pentaho.di.cluster.SlaveServer) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) DummyTransMeta(org.pentaho.di.trans.steps.dummytrans.DummyTransMeta) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) KeyPair(java.security.KeyPair) SocketReaderMeta(org.pentaho.di.trans.steps.socketreader.SocketReaderMeta) PartitionSchema(org.pentaho.di.partition.PartitionSchema) PublicKey(java.security.PublicKey) InvalidKeyException(java.security.InvalidKeyException) StepMeta(org.pentaho.di.trans.step.StepMeta) KettleException(org.pentaho.di.core.exception.KettleException) IllegalBlockSizeException(javax.crypto.IllegalBlockSizeException) InvalidKeyException(java.security.InvalidKeyException) LinkedList(java.util.LinkedList) RemoteStep(org.pentaho.di.trans.step.RemoteStep) TransHopMeta(org.pentaho.di.trans.TransHopMeta) HashMap(java.util.HashMap) Map(java.util.Map) ClusterSchema(org.pentaho.di.cluster.ClusterSchema) PublicKey(java.security.PublicKey) Key(java.security.Key) PrivateKey(java.security.PrivateKey)

Aggregations

InvalidKeyException (java.security.InvalidKeyException)1 Key (java.security.Key)1 KeyPair (java.security.KeyPair)1 PrivateKey (java.security.PrivateKey)1 PublicKey (java.security.PublicKey)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 IllegalBlockSizeException (javax.crypto.IllegalBlockSizeException)1 ClusterSchema (org.pentaho.di.cluster.ClusterSchema)1 SlaveServer (org.pentaho.di.cluster.SlaveServer)1 KettleException (org.pentaho.di.core.exception.KettleException)1 PartitionSchema (org.pentaho.di.partition.PartitionSchema)1 TransHopMeta (org.pentaho.di.trans.TransHopMeta)1 TransMeta (org.pentaho.di.trans.TransMeta)1 RemoteStep (org.pentaho.di.trans.step.RemoteStep)1 StepMeta (org.pentaho.di.trans.step.StepMeta)1 StepPartitioningMeta (org.pentaho.di.trans.step.StepPartitioningMeta)1