Examples with StepPartitioningMeta - org.pentaho.di.trans.step.StepPartitioningMeta

Example 1 with StepPartitioningMeta

use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.

the class TransMeta method loadXML.

/**
 * Parses an XML DOM (starting at the specified Node) that describes the transformation.
 *
 * @param transnode
 *          The XML node to load from
 * @param fname
 *          The filename
 * @param rep
 *          The repository to load the default list of database connections from (null if no repository is available)
 * @param setInternalVariables
 *          true if you want to set the internal variables based on this transformation information
 * @param parentVariableSpace
 *          the parent variable space to use during TransMeta construction
 * @param prompter
 *          the changed/replace listener or null if there is none
 * @throws KettleXMLException
 *           if any errors occur during parsing of the specified file
 * @throws KettleMissingPluginsException
 *           in case missing plugins were found (details are in the exception in that case)
 */
public void loadXML(Node transnode, String fname, IMetaStore metaStore, Repository rep, boolean setInternalVariables, VariableSpace parentVariableSpace, OverwritePrompter prompter) throws KettleXMLException, KettleMissingPluginsException {
    KettleMissingPluginsException missingPluginsException = new KettleMissingPluginsException(BaseMessages.getString(PKG, "TransMeta.MissingPluginsFoundWhileLoadingTransformation.Exception"));
    // Remember this as the primary meta store.
    this.metaStore = metaStore;
    try {
        Props props = null;
        if (Props.isInitialized()) {
            props = Props.getInstance();
        }
        initializeVariablesFrom(parentVariableSpace);
        try {
            // Clear the transformation
            clear();
            // Set the filename here so it can be used in variables for ALL aspects of the transformation FIX: PDI-8890
            if (null == rep) {
                setFilename(fname);
            } else {
                // Set the repository here so it can be used in variables for ALL aspects of the job FIX: PDI-16441
                setRepository(rep);
            }
            // 
            try {
                sharedObjectsFile = XMLHandler.getTagValue(transnode, "info", "shared_objects_file");
                sharedObjects = rep != null ? rep.readTransSharedObjects(this) : readSharedObjects();
            } catch (Exception e) {
                log.logError(BaseMessages.getString(PKG, "TransMeta.ErrorReadingSharedObjects.Message", e.toString()));
                log.logError(Const.getStackTracker(e));
            }
            // Load the database connections, slave servers, cluster schemas & partition schemas into this object.
            // 
            importFromMetaStore();
            // Handle connections
            int n = XMLHandler.countNodes(transnode, DatabaseMeta.XML_TAG);
            Set<String> privateTransformationDatabases = new HashSet<>(n);
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveConnections", String.valueOf(n)));
            }
            for (int i = 0; i < n; i++) {
                if (log.isDebug()) {
                    log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtConnection") + i);
                }
                Node nodecon = XMLHandler.getSubNodeByNr(transnode, DatabaseMeta.XML_TAG, i);
                DatabaseMeta dbcon = new DatabaseMeta(nodecon);
                dbcon.shareVariablesWith(this);
                if (!dbcon.isShared()) {
                    privateTransformationDatabases.add(dbcon.getName());
                }
                DatabaseMeta exist = findDatabase(dbcon.getName());
                if (exist == null) {
                    addDatabase(dbcon);
                } else {
                    if (!exist.isShared()) {
                        // otherwise, we just keep the shared connection.
                        if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnectionYN", dbcon.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
                            int idx = indexOfDatabase(exist);
                            removeDatabase(idx);
                            addDatabase(idx, dbcon);
                        }
                    }
                }
            }
            setPrivateDatabases(privateTransformationDatabases);
            // Read the notes...
            Node notepadsnode = XMLHandler.getSubNode(transnode, XML_TAG_NOTEPADS);
            int nrnotes = XMLHandler.countNodes(notepadsnode, NotePadMeta.XML_TAG);
            for (int i = 0; i < nrnotes; i++) {
                Node notepadnode = XMLHandler.getSubNodeByNr(notepadsnode, NotePadMeta.XML_TAG, i);
                NotePadMeta ni = new NotePadMeta(notepadnode);
                notes.add(ni);
            }
            // Handle Steps
            int s = XMLHandler.countNodes(transnode, StepMeta.XML_TAG);
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.ReadingSteps") + s + " steps...");
            }
            for (int i = 0; i < s; i++) {
                Node stepnode = XMLHandler.getSubNodeByNr(transnode, StepMeta.XML_TAG, i);
                if (log.isDebug()) {
                    log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtStep") + i);
                }
                StepMeta stepMeta = new StepMeta(stepnode, databases, metaStore);
                // for tracing, retain hierarchy
                stepMeta.setParentTransMeta(this);
                if (stepMeta.isMissing()) {
                    addMissingTrans((MissingTrans) stepMeta.getStepMetaInterface());
                }
                // Check if the step exists and if it's a shared step.
                // If so, then we will keep the shared version, not this one.
                // The stored XML is only for backup purposes.
                // 
                StepMeta check = findStep(stepMeta.getName());
                if (check != null) {
                    if (!check.isShared()) {
                        // Don't overwrite shared objects
                        addOrReplaceStep(stepMeta);
                    } else {
                        // Just keep the drawn flag and location
                        check.setDraw(stepMeta.isDrawn());
                        check.setLocation(stepMeta.getLocation());
                    }
                } else {
                    // simply add it.
                    addStep(stepMeta);
                }
            }
            // Read the error handling code of the steps...
            // 
            Node errorHandlingNode = XMLHandler.getSubNode(transnode, XML_TAG_STEP_ERROR_HANDLING);
            int nrErrorHandlers = XMLHandler.countNodes(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG);
            for (int i = 0; i < nrErrorHandlers; i++) {
                Node stepErrorMetaNode = XMLHandler.getSubNodeByNr(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG, i);
                StepErrorMeta stepErrorMeta = new StepErrorMeta(this, stepErrorMetaNode, steps);
                if (stepErrorMeta.getSourceStep() != null) {
                    // a bit of a trick, I know.
                    stepErrorMeta.getSourceStep().setStepErrorMeta(stepErrorMeta);
                }
            }
            // 
            for (int i = 0; i < nrSteps(); i++) {
                StepMeta stepMeta = getStep(i);
                StepMetaInterface sii = stepMeta.getStepMetaInterface();
                if (sii != null) {
                    sii.searchInfoAndTargetSteps(steps);
                }
            }
            // Handle Hops
            // 
            Node ordernode = XMLHandler.getSubNode(transnode, XML_TAG_ORDER);
            n = XMLHandler.countNodes(ordernode, TransHopMeta.XML_HOP_TAG);
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveHops") + n + " hops...");
            }
            for (int i = 0; i < n; i++) {
                if (log.isDebug()) {
                    log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtHop") + i);
                }
                Node hopnode = XMLHandler.getSubNodeByNr(ordernode, TransHopMeta.XML_HOP_TAG, i);
                TransHopMeta hopinf = new TransHopMeta(hopnode, steps);
                hopinf.setErrorHop(isErrorNode(errorHandlingNode, hopnode));
                addTransHop(hopinf);
            }
            // 
            // get transformation info:
            // 
            Node infonode = XMLHandler.getSubNode(transnode, XML_TAG_INFO);
            // Name
            // 
            setName(XMLHandler.getTagValue(infonode, "name"));
            // description
            // 
            description = XMLHandler.getTagValue(infonode, "description");
            // extended description
            // 
            extendedDescription = XMLHandler.getTagValue(infonode, "extended_description");
            // trans version
            // 
            trans_version = XMLHandler.getTagValue(infonode, "trans_version");
            // trans status
            // 
            trans_status = Const.toInt(XMLHandler.getTagValue(infonode, "trans_status"), -1);
            String transTypeCode = XMLHandler.getTagValue(infonode, "trans_type");
            transformationType = TransformationType.getTransformationTypeByCode(transTypeCode);
            // 
            if (rep != null) {
                String directoryPath = XMLHandler.getTagValue(infonode, "directory");
                if (directoryPath != null) {
                    directory = rep.findDirectory(directoryPath);
                    if (directory == null) {
                        // not found
                        // The root as default
                        directory = new RepositoryDirectory();
                    }
                }
            }
            // Read logging table information
            // 
            Node logNode = XMLHandler.getSubNode(infonode, "log");
            if (logNode != null) {
                // Backward compatibility...
                // 
                Node transLogNode = XMLHandler.getSubNode(logNode, TransLogTable.XML_TAG);
                if (transLogNode == null) {
                    // Load the XML
                    // 
                    transLogTable.findField(TransLogTable.ID.LINES_READ).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "read")));
                    transLogTable.findField(TransLogTable.ID.LINES_WRITTEN).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "write")));
                    transLogTable.findField(TransLogTable.ID.LINES_INPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "input")));
                    transLogTable.findField(TransLogTable.ID.LINES_OUTPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "output")));
                    transLogTable.findField(TransLogTable.ID.LINES_UPDATED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "update")));
                    transLogTable.findField(TransLogTable.ID.LINES_REJECTED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "rejected")));
                    transLogTable.setConnectionName(XMLHandler.getTagValue(infonode, "log", "connection"));
                    transLogTable.setSchemaName(XMLHandler.getTagValue(infonode, "log", "schema"));
                    transLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "table"));
                    transLogTable.findField(TransLogTable.ID.ID_BATCH).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "use_batchid")));
                    transLogTable.findField(TransLogTable.ID.LOG_FIELD).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "USE_LOGFIELD")));
                    transLogTable.setLogSizeLimit(XMLHandler.getTagValue(infonode, "log", "size_limit_lines"));
                    transLogTable.setLogInterval(XMLHandler.getTagValue(infonode, "log", "interval"));
                    transLogTable.findField(TransLogTable.ID.CHANNEL_ID).setEnabled(false);
                    transLogTable.findField(TransLogTable.ID.LINES_REJECTED).setEnabled(false);
                    performanceLogTable.setConnectionName(transLogTable.getConnectionName());
                    performanceLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "step_performance_table"));
                } else {
                    transLogTable.loadXML(transLogNode, databases, steps);
                }
                Node perfLogNode = XMLHandler.getSubNode(logNode, PerformanceLogTable.XML_TAG);
                if (perfLogNode != null) {
                    performanceLogTable.loadXML(perfLogNode, databases, steps);
                }
                Node channelLogNode = XMLHandler.getSubNode(logNode, ChannelLogTable.XML_TAG);
                if (channelLogNode != null) {
                    channelLogTable.loadXML(channelLogNode, databases, steps);
                }
                Node stepLogNode = XMLHandler.getSubNode(logNode, StepLogTable.XML_TAG);
                if (stepLogNode != null) {
                    stepLogTable.loadXML(stepLogNode, databases, steps);
                }
                Node metricsLogNode = XMLHandler.getSubNode(logNode, MetricsLogTable.XML_TAG);
                if (metricsLogNode != null) {
                    metricsLogTable.loadXML(metricsLogNode, databases, steps);
                }
            }
            // Maxdate range options...
            String maxdatcon = XMLHandler.getTagValue(infonode, "maxdate", "connection");
            maxDateConnection = findDatabase(maxdatcon);
            maxDateTable = XMLHandler.getTagValue(infonode, "maxdate", "table");
            maxDateField = XMLHandler.getTagValue(infonode, "maxdate", "field");
            String offset = XMLHandler.getTagValue(infonode, "maxdate", "offset");
            maxDateOffset = Const.toDouble(offset, 0.0);
            String mdiff = XMLHandler.getTagValue(infonode, "maxdate", "maxdiff");
            maxDateDifference = Const.toDouble(mdiff, 0.0);
            // Check the dependencies as far as dates are concerned...
            // We calculate BEFORE we run the MAX of these dates
            // If the date is larger then enddate, startdate is set to MIN_DATE
            // 
            Node depsNode = XMLHandler.getSubNode(infonode, XML_TAG_DEPENDENCIES);
            int nrDeps = XMLHandler.countNodes(depsNode, TransDependency.XML_TAG);
            for (int i = 0; i < nrDeps; i++) {
                Node depNode = XMLHandler.getSubNodeByNr(depsNode, TransDependency.XML_TAG, i);
                TransDependency transDependency = new TransDependency(depNode, databases);
                if (transDependency.getDatabase() != null && transDependency.getFieldname() != null) {
                    addDependency(transDependency);
                }
            }
            // Read the named parameters.
            Node paramsNode = XMLHandler.getSubNode(infonode, XML_TAG_PARAMETERS);
            int nrParams = XMLHandler.countNodes(paramsNode, "parameter");
            for (int i = 0; i < nrParams; i++) {
                Node paramNode = XMLHandler.getSubNodeByNr(paramsNode, "parameter", i);
                String paramName = XMLHandler.getTagValue(paramNode, "name");
                String defaultValue = XMLHandler.getTagValue(paramNode, "default_value");
                String descr = XMLHandler.getTagValue(paramNode, "description");
                addParameterDefinition(paramName, defaultValue, descr);
            }
            // Read the partitioning schemas
            // 
            Node partSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_PARTITIONSCHEMAS);
            int nrPartSchemas = XMLHandler.countNodes(partSchemasNode, PartitionSchema.XML_TAG);
            for (int i = 0; i < nrPartSchemas; i++) {
                Node partSchemaNode = XMLHandler.getSubNodeByNr(partSchemasNode, PartitionSchema.XML_TAG, i);
                PartitionSchema partitionSchema = new PartitionSchema(partSchemaNode);
                // Check if the step exists and if it's a shared step.
                // If so, then we will keep the shared version, not this one.
                // The stored XML is only for backup purposes.
                // 
                PartitionSchema check = findPartitionSchema(partitionSchema.getName());
                if (check != null) {
                    if (!check.isShared()) {
                        // we don't overwrite shared objects.
                        if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwritePartitionSchemaYN", partitionSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
                            addOrReplacePartitionSchema(partitionSchema);
                        }
                    }
                } else {
                    partitionSchemas.add(partitionSchema);
                }
            }
            // 
            for (int i = 0; i < nrSteps(); i++) {
                StepPartitioningMeta stepPartitioningMeta = getStep(i).getStepPartitioningMeta();
                if (stepPartitioningMeta != null) {
                    stepPartitioningMeta.setPartitionSchemaAfterLoading(partitionSchemas);
                }
                StepPartitioningMeta targetStepPartitioningMeta = getStep(i).getTargetStepPartitioningMeta();
                if (targetStepPartitioningMeta != null) {
                    targetStepPartitioningMeta.setPartitionSchemaAfterLoading(partitionSchemas);
                }
            }
            // Read the slave servers...
            // 
            Node slaveServersNode = XMLHandler.getSubNode(infonode, XML_TAG_SLAVESERVERS);
            int nrSlaveServers = XMLHandler.countNodes(slaveServersNode, SlaveServer.XML_TAG);
            for (int i = 0; i < nrSlaveServers; i++) {
                Node slaveServerNode = XMLHandler.getSubNodeByNr(slaveServersNode, SlaveServer.XML_TAG, i);
                SlaveServer slaveServer = new SlaveServer(slaveServerNode);
                if (slaveServer.getName() == null) {
                    log.logError(BaseMessages.getString(PKG, "TransMeta.Log.WarningWhileCreationSlaveServer", slaveServer.getName()));
                    continue;
                }
                slaveServer.shareVariablesWith(this);
                // Check if the object exists and if it's a shared object.
                // If so, then we will keep the shared version, not this one.
                // The stored XML is only for backup purposes.
                SlaveServer check = findSlaveServer(slaveServer.getName());
                if (check != null) {
                    if (!check.isShared()) {
                        // we don't overwrite shared objects.
                        if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteSlaveServerYN", slaveServer.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
                            addOrReplaceSlaveServer(slaveServer);
                        }
                    }
                } else {
                    slaveServers.add(slaveServer);
                }
            }
            // Read the cluster schemas
            // 
            Node clusterSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_CLUSTERSCHEMAS);
            int nrClusterSchemas = XMLHandler.countNodes(clusterSchemasNode, ClusterSchema.XML_TAG);
            for (int i = 0; i < nrClusterSchemas; i++) {
                Node clusterSchemaNode = XMLHandler.getSubNodeByNr(clusterSchemasNode, ClusterSchema.XML_TAG, i);
                ClusterSchema clusterSchema = new ClusterSchema(clusterSchemaNode, slaveServers);
                clusterSchema.shareVariablesWith(this);
                // Check if the object exists and if it's a shared object.
                // If so, then we will keep the shared version, not this one.
                // The stored XML is only for backup purposes.
                ClusterSchema check = findClusterSchema(clusterSchema.getName());
                if (check != null) {
                    if (!check.isShared()) {
                        // we don't overwrite shared objects.
                        if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteClusterSchemaYN", clusterSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
                            addOrReplaceClusterSchema(clusterSchema);
                        }
                    }
                } else {
                    clusterSchemas.add(clusterSchema);
                }
            }
            // 
            for (int i = 0; i < nrSteps(); i++) {
                getStep(i).setClusterSchemaAfterLoading(clusterSchemas);
            }
            String srowset = XMLHandler.getTagValue(infonode, "size_rowset");
            sizeRowset = Const.toInt(srowset, Const.ROWS_IN_ROWSET);
            sleepTimeEmpty = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_empty"), Const.TIMEOUT_GET_MILLIS);
            sleepTimeFull = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_full"), Const.TIMEOUT_PUT_MILLIS);
            usingUniqueConnections = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "unique_connections"));
            feedbackShown = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "feedback_shown"));
            feedbackSize = Const.toInt(XMLHandler.getTagValue(infonode, "feedback_size"), Const.ROWS_UPDATE);
            usingThreadPriorityManagment = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "using_thread_priorities"));
            // Performance monitoring for steps...
            // 
            capturingStepPerformanceSnapShots = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "capture_step_performance"));
            stepPerformanceCapturingDelay = Const.toLong(XMLHandler.getTagValue(infonode, "step_performance_capturing_delay"), 1000);
            stepPerformanceCapturingSizeLimit = XMLHandler.getTagValue(infonode, "step_performance_capturing_size_limit");
            // Created user/date
            createdUser = XMLHandler.getTagValue(infonode, "created_user");
            String createDate = XMLHandler.getTagValue(infonode, "created_date");
            if (createDate != null) {
                createdDate = XMLHandler.stringToDate(createDate);
            }
            // Changed user/date
            modifiedUser = XMLHandler.getTagValue(infonode, "modified_user");
            String modDate = XMLHandler.getTagValue(infonode, "modified_date");
            if (modDate != null) {
                modifiedDate = XMLHandler.stringToDate(modDate);
            }
            Node partitionDistNode = XMLHandler.getSubNode(transnode, SlaveStepCopyPartitionDistribution.XML_TAG);
            if (partitionDistNode != null) {
                slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution(partitionDistNode);
            } else {
                // leave empty
                slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution();
            }
            // Is this a slave transformation?
            // 
            slaveTransformation = "Y".equalsIgnoreCase(XMLHandler.getTagValue(transnode, "slave_transformation"));
            if (log.isDebug()) {
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfStepsReaded") + nrSteps());
                log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfHopsReaded") + nrTransHops());
            }
            sortSteps();
            // Load the attribute groups map
            // 
            attributesMap = AttributesUtil.loadAttributes(XMLHandler.getSubNode(transnode, AttributesUtil.XML_TAG));
            keyForSessionKey = XMLHandler.stringToBinary(XMLHandler.getTagValue(infonode, "key_for_session_key"));
            isKeyPrivate = "Y".equals(XMLHandler.getTagValue(infonode, "is_key_private"));
        } catch (KettleXMLException xe) {
            throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), xe);
        } catch (KettleException e) {
            throw new KettleXMLException(e);
        } finally {
            initializeVariablesFrom(null);
            if (setInternalVariables) {
                setInternalKettleVariables();
            }
            ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationMetaLoaded.id, this);
        }
    } catch (Exception e) {
        // 
        if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
            throw missingPluginsException;
        } else {
            throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), e);
        }
    } finally {
        if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
            throw missingPluginsException;
        }
    }
}

Also used : KettleException(org.pentaho.di.core.exception.KettleException) RepositoryDirectory(org.pentaho.di.repository.RepositoryDirectory) KettleMissingPluginsException(org.pentaho.di.core.exception.KettleMissingPluginsException) PartitionSchema(org.pentaho.di.partition.PartitionSchema) Node(org.w3c.dom.Node) StepErrorMeta(org.pentaho.di.trans.step.StepErrorMeta) StepMetaInterface(org.pentaho.di.trans.step.StepMetaInterface) Props(org.pentaho.di.core.Props) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) SlaveServer(org.pentaho.di.cluster.SlaveServer) DatabaseMeta(org.pentaho.di.core.database.DatabaseMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) KettleXMLException(org.pentaho.di.core.exception.KettleXMLException) KettleRowException(org.pentaho.di.core.exception.KettleRowException) FileSystemException(org.apache.commons.vfs2.FileSystemException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) IOException(java.io.IOException) KettleMissingPluginsException(org.pentaho.di.core.exception.KettleMissingPluginsException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) KettleException(org.pentaho.di.core.exception.KettleException) KettleDatabaseException(org.pentaho.di.core.exception.KettleDatabaseException) Point(org.pentaho.di.core.gui.Point) KettleExtensionPoint(org.pentaho.di.core.extension.KettleExtensionPoint) KettleXMLException(org.pentaho.di.core.exception.KettleXMLException) NotePadMeta(org.pentaho.di.core.NotePadMeta) ClusterSchema(org.pentaho.di.cluster.ClusterSchema) HashSet(java.util.HashSet)

Example 2 with StepPartitioningMeta

use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.

the class Trans method prepareExecution.

/**
 * Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
 * and tracking the steps and hops in the transformation.
 *
 * @param arguments the arguments to use for this transformation
 * @throws KettleException in case the transformation could not be prepared (initialized)
 */
public void prepareExecution(String[] arguments) throws KettleException {
    setPreparing(true);
    startDate = null;
    setRunning(false);
    log.snap(Metrics.METRIC_TRANSFORMATION_EXECUTION_START);
    log.snap(Metrics.METRIC_TRANSFORMATION_INIT_START);
    ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationPrepareExecution.id, this);
    checkCompatibility();
    // 
    if (arguments != null) {
        setArguments(arguments);
    }
    if (parentTrans != null) {
        IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentTrans, log));
    } else {
        // If there is no parent, one of these still needs to be called to instantiate a new cache
        IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentJob, log));
    }
    activateParameters();
    transMeta.activateParameters();
    ConnectionUtil.init(transMeta);
    if (transMeta.getName() == null) {
        if (transMeta.getFilename() != null) {
            log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
        }
    } else {
        log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
    }
    if (getArguments() != null) {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
        }
    }
    if (isSafeModeEnabled()) {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
        }
    }
    if (getReplayDate() != null) {
        SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
        log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
    } else {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
        }
    }
    // 
    if (servletPrintWriter == null) {
        String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
        if (encoding == null) {
            servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
        } else {
            try {
                servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
            } catch (UnsupportedEncodingException ex) {
                servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
            }
        }
    }
    // Keep track of all the row sets and allocated steps
    // 
    steps = new ArrayList<>();
    rowsets = new ArrayList<>();
    List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
    }
    // 
    for (int i = 0; i < hopsteps.size(); i++) {
        StepMeta thisStep = hopsteps.get(i);
        if (thisStep.isMapping()) {
            // handled and allocated by the mapping step itself.
            continue;
        }
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
        }
        List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
        int nrTargets = nextSteps.size();
        for (int n = 0; n < nrTargets; n++) {
            // What's the next step?
            StepMeta nextStep = nextSteps.get(n);
            if (nextStep.isMapping()) {
                // handled and allocated by the mapping step itself.
                continue;
            }
            // How many times do we start the source step?
            int thisCopies = thisStep.getCopies();
            if (thisCopies < 0) {
                // 
                throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
            }
            // How many times do we start the target step?
            int nextCopies = nextStep.getCopies();
            // Are we re-partitioning?
            boolean repartitioning;
            if (thisStep.isPartitioned()) {
                repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
            } else {
                repartitioning = nextStep.isPartitioned();
            }
            int nrCopies;
            if (log.isDetailed()) {
                log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
            }
            int dispatchType;
            if (thisCopies == 1 && nextCopies == 1) {
                dispatchType = TYPE_DISP_1_1;
                nrCopies = 1;
            } else if (thisCopies == 1 && nextCopies > 1) {
                dispatchType = TYPE_DISP_1_N;
                nrCopies = nextCopies;
            } else if (thisCopies > 1 && nextCopies == 1) {
                dispatchType = TYPE_DISP_N_1;
                nrCopies = thisCopies;
            } else if (thisCopies == nextCopies && !repartitioning) {
                dispatchType = TYPE_DISP_N_N;
                nrCopies = nextCopies;
            } else {
                // > 1!
                dispatchType = TYPE_DISP_N_M;
                nrCopies = nextCopies;
            }
            // 
            if (dispatchType != TYPE_DISP_N_M) {
                for (int c = 0; c < nrCopies; c++) {
                    RowSet rowSet;
                    switch(transMeta.getTransformationType()) {
                        case Normal:
                            // This is a temporary patch until the batching rowset has proven
                            // to be working in all situations.
                            // Currently there are stalling problems when dealing with small
                            // amounts of rows.
                            // 
                            Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
                            if (batchingRowSet != null && batchingRowSet.booleanValue()) {
                                rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
                            } else {
                                rowSet = new BlockingRowSet(transMeta.getSizeRowset());
                            }
                            break;
                        case SerialSingleThreaded:
                            rowSet = new SingleRowRowSet();
                            break;
                        case SingleThreaded:
                            rowSet = new QueueRowSet();
                            break;
                        default:
                            throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
                    }
                    switch(dispatchType) {
                        case TYPE_DISP_1_1:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
                            break;
                        case TYPE_DISP_1_N:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
                            break;
                        case TYPE_DISP_N_1:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
                            break;
                        case TYPE_DISP_N_N:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
                            break;
                        default:
                            break;
                    }
                    rowsets.add(rowSet);
                    if (log.isDetailed()) {
                        log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
                    }
                }
            } else {
                // distribution...
                for (int s = 0; s < thisCopies; s++) {
                    for (int t = 0; t < nextCopies; t++) {
                        BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
                        rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
                        rowsets.add(rowSet);
                        if (log.isDetailed()) {
                            log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
                        }
                    }
                }
            }
        }
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
    }
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
    }
    // 
    for (int i = 0; i < hopsteps.size(); i++) {
        StepMeta stepMeta = hopsteps.get(i);
        String stepid = stepMeta.getStepID();
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
        }
        // How many copies are launched of this step?
        int nrCopies = stepMeta.getCopies();
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
        }
        // At least run once...
        for (int c = 0; c < nrCopies; c++) {
            // Make sure we haven't started it yet!
            if (!hasStepStarted(stepMeta.getName(), c)) {
                StepMetaDataCombi combi = new StepMetaDataCombi();
                combi.stepname = stepMeta.getName();
                combi.copy = c;
                // The meta-data
                combi.stepMeta = stepMeta;
                combi.meta = stepMeta.getStepMetaInterface();
                // Allocate the step data
                StepDataInterface data = combi.meta.getStepData();
                combi.data = data;
                // Allocate the step
                StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
                // Copy the variables of the transformation to the step...
                // don't share. Each copy of the step has its own variables.
                // 
                step.initializeVariablesFrom(this);
                step.setUsingThreadPriorityManagment(transMeta.isUsingThreadPriorityManagment());
                // Pass the connected repository & metaStore to the steps runtime
                // 
                step.setRepository(repository);
                step.setMetaStore(metaStore);
                // things as well...
                if (stepMeta.isPartitioned()) {
                    List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
                    if (partitionIDs != null && !partitionIDs.isEmpty()) {
                        // Pass the partition ID
                        step.setPartitionID(partitionIDs.get(c));
                    // to the step
                    }
                }
                // Save the step too
                combi.step = step;
                // /
                if (combi.step instanceof LoggingObjectInterface) {
                    LogChannelInterface logChannel = combi.step.getLogChannel();
                    logChannel.setLogLevel(logLevel);
                    logChannel.setGatheringMetrics(log.isGatheringMetrics());
                }
                // Add to the bunch...
                steps.add(combi);
                if (log.isDetailed()) {
                    log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
                }
            }
        }
    }
    // 
    for (int s = 0; s < steps.size(); s++) {
        StepMetaDataCombi combi = steps.get(s);
        if (combi.stepMeta.isDoingErrorHandling()) {
            combi.step.identifyErrorOutput();
        }
    }
    // Now (optionally) write start log record!
    // Make sure we synchronize appropriately to avoid duplicate batch IDs.
    // 
    Object syncObject = this;
    if (parentJob != null) {
        // parallel execution in a job
        syncObject = parentJob;
    }
    if (parentTrans != null) {
        // multiple sub-transformations
        syncObject = parentTrans;
    }
    synchronized (syncObject) {
        calculateBatchIdAndDateRange();
        beginProcessing();
    }
    // 
    for (int i = 0; i < steps.size(); i++) {
        StepMetaDataCombi sid = steps.get(i);
        StepMeta stepMeta = sid.stepMeta;
        StepInterface baseStep = sid.step;
        baseStep.setPartitioned(stepMeta.isPartitioned());
        // Now let's take a look at the source and target relation
        // 
        // If this source step is not partitioned, and the target step is: it
        // means we need to re-partition the incoming data.
        // If both steps are partitioned on the same method and schema, we don't
        // need to re-partition
        // If both steps are partitioned on a different method or schema, we need
        // to re-partition as well.
        // If both steps are not partitioned, we don't need to re-partition
        // 
        boolean isThisPartitioned = stepMeta.isPartitioned();
        PartitionSchema thisPartitionSchema = null;
        if (isThisPartitioned) {
            thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
        }
        boolean isNextPartitioned = false;
        StepPartitioningMeta nextStepPartitioningMeta = null;
        PartitionSchema nextPartitionSchema = null;
        List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
        int nrNext = nextSteps.size();
        for (int p = 0; p < nrNext; p++) {
            StepMeta nextStep = nextSteps.get(p);
            if (nextStep.isPartitioned()) {
                isNextPartitioned = true;
                nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
                nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
            }
        }
        baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_NONE);
        // 
        if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
            baseStep.setRepartitioning(nextStepPartitioningMeta.getMethodType());
        }
        // For partitioning to a set of remove steps (repartitioning from a master
        // to a set or remote output steps)
        // 
        StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
        if (targetStepPartitioningMeta != null) {
            baseStep.setRepartitioning(targetStepPartitioningMeta.getMethodType());
        }
    }
    setPreparing(false);
    setInitializing(true);
    // 
    if (isMonitored() && steps.size() < 150) {
        doTopologySortOfSteps();
    }
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
    }
    StepInitThread[] initThreads = new StepInitThread[steps.size()];
    Thread[] threads = new Thread[steps.size()];
    // 
    for (int i = 0; i < steps.size(); i++) {
        final StepMetaDataCombi sid = steps.get(i);
        // Do the init code in the background!
        // Init all steps at once, but ALL steps need to finish before we can
        // continue properly!
        // 
        initThreads[i] = new StepInitThread(sid, log);
        // Put it in a separate thread!
        // 
        threads[i] = new Thread(initThreads[i]);
        threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
        ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepBeforeInitialize.id, initThreads[i]);
        threads[i].start();
    }
    for (int i = 0; i < threads.length; i++) {
        try {
            threads[i].join();
            ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepAfterInitialize.id, initThreads[i]);
        } catch (Exception ex) {
            log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
            log.logError(Const.getStackTracker(ex));
        }
    }
    setInitializing(false);
    boolean ok = true;
    // 
    for (int i = 0; i < initThreads.length; i++) {
        StepMetaDataCombi combi = initThreads[i].getCombi();
        if (!initThreads[i].isOk()) {
            log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));
            combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
            ok = false;
        } else {
            combi.data.setStatus(StepExecutionStatus.STATUS_IDLE);
            if (log.isDetailed()) {
                log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
            }
        }
    }
    if (!ok) {
        // One or more steps failed on initialization.
        // Transformation is now stopped.
        setStopped(true);
        // 
        for (int i = 0; i < initThreads.length; i++) {
            StepMetaDataCombi combi = initThreads[i].getCombi();
            // Dispose will overwrite the status, but we set it back right after
            // this.
            combi.step.dispose(combi.meta, combi.data);
            if (initThreads[i].isOk()) {
                combi.data.setStatus(StepExecutionStatus.STATUS_HALTED);
            } else {
                combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
            }
        }
        // Just for safety, fire the trans finished listeners...
        try {
            fireTransFinishedListeners();
        } catch (KettleException e) {
            // listeners produces errors
            log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
        // we will not pass this exception up to prepareExecution() entry point.
        } finally {
            // Flag the transformation as finished even if exception was thrown
            setFinished(true);
        }
        // 
        if (preview) {
            String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
            throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
        } else {
            throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
        }
    }
    log.snap(Metrics.METRIC_TRANSFORMATION_INIT_STOP);
    KettleEnvironment.setExecutionInformation(this, repository);
    setReadyToStart(true);
}

Also used : KettleException(org.pentaho.di.core.exception.KettleException) QueueRowSet(org.pentaho.di.core.QueueRowSet) SingleRowRowSet(org.pentaho.di.core.SingleRowRowSet) RowSet(org.pentaho.di.core.RowSet) BlockingBatchingRowSet(org.pentaho.di.core.BlockingBatchingRowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepInterface(org.pentaho.di.trans.step.StepInterface) QueueRowSet(org.pentaho.di.core.QueueRowSet) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) StepDataInterface(org.pentaho.di.trans.step.StepDataInterface) BlockingBatchingRowSet(org.pentaho.di.core.BlockingBatchingRowSet) PrintWriter(java.io.PrintWriter) PartitionSchema(org.pentaho.di.partition.PartitionSchema) UnsupportedEncodingException(java.io.UnsupportedEncodingException) StepMeta(org.pentaho.di.trans.step.StepMeta) KettleExtensionPoint(org.pentaho.di.core.extension.KettleExtensionPoint) UnknownParamException(org.pentaho.di.core.parameters.UnknownParamException) KettleValueException(org.pentaho.di.core.exception.KettleValueException) KettleTransException(org.pentaho.di.core.exception.KettleTransException) DuplicateParamException(org.pentaho.di.core.parameters.DuplicateParamException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) KettleException(org.pentaho.di.core.exception.KettleException) KettleDatabaseException(org.pentaho.di.core.exception.KettleDatabaseException) RunThread(org.pentaho.di.trans.step.RunThread) StepInitThread(org.pentaho.di.trans.step.StepInitThread) SingleRowRowSet(org.pentaho.di.core.SingleRowRowSet) OutputStreamWriter(java.io.OutputStreamWriter) FileObject(org.apache.commons.vfs2.FileObject) LoggingObjectInterface(org.pentaho.di.core.logging.LoggingObjectInterface) SimpleDateFormat(java.text.SimpleDateFormat) LogChannelInterface(org.pentaho.di.core.logging.LogChannelInterface) HasLogChannelInterface(org.pentaho.di.core.logging.HasLogChannelInterface) StepInitThread(org.pentaho.di.trans.step.StepInitThread)

Example 3 with StepPartitioningMeta

use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.

the class TransSplitter method verifySlavePartitioningConfiguration.

private void verifySlavePartitioningConfiguration(TransMeta slave, StepMeta stepMeta, ClusterSchema clusterSchema, SlaveServer slaveServer) {
    Map<StepMeta, String> stepPartitionFlag = slaveStepPartitionFlag.get(slave);
    if (stepPartitionFlag == null) {
        stepPartitionFlag = new Hashtable<StepMeta, String>();
        slaveStepPartitionFlag.put(slave, stepPartitionFlag);
    }
    if (stepPartitionFlag.get(stepMeta) != null) {
        // already done;
        return;
    }
    StepPartitioningMeta partitioningMeta = stepMeta.getStepPartitioningMeta();
    if (partitioningMeta != null && partitioningMeta.getMethodType() != StepPartitioningMeta.PARTITIONING_METHOD_NONE && partitioningMeta.getPartitionSchema() != null) {
        // Find the schemaPartitions map to use
        Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
        if (schemaPartitionsMap != null) {
            PartitionSchema partitionSchema = partitioningMeta.getPartitionSchema();
            List<String> partitionsList = schemaPartitionsMap.get(partitionSchema);
            if (partitionsList != null) {
                // We found a list of partitions, now let's create a new partition schema with this data.
                String targetSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
                PartitionSchema targetSchema = slave.findPartitionSchema(targetSchemaName);
                if (targetSchema == null) {
                    targetSchema = new PartitionSchema(targetSchemaName, partitionsList);
                    // add it to the slave if it doesn't exist.
                    slave.getPartitionSchemas().add(targetSchema);
                }
            }
        }
    }
    // is done.
    stepPartitionFlag.put(stepMeta, "Y");
}

Also used : PartitionSchema(org.pentaho.di.partition.PartitionSchema) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepMeta(org.pentaho.di.trans.step.StepMeta)

Example 4 with StepPartitioningMeta

use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.

the class TransSplitter method generateSlavePartitionSchemas.

/**
 * We want to divide the available partitions over the slaves. Let's create a hashtable that contains the partition
 * schema's Since we can only use a single cluster, we can divide them all over a single set of slave servers.
 *
 * @throws KettleException
 */
private void generateSlavePartitionSchemas() throws KettleException {
    slaveServerPartitionsMap = new Hashtable<SlaveServer, Map<PartitionSchema, List<String>>>();
    for (int i = 0; i < referenceSteps.length; i++) {
        StepMeta stepMeta = referenceSteps[i];
        StepPartitioningMeta stepPartitioningMeta = stepMeta.getStepPartitioningMeta();
        if (stepPartitioningMeta == null) {
            continue;
        }
        if (stepPartitioningMeta.getMethodType() == StepPartitioningMeta.PARTITIONING_METHOD_NONE) {
            continue;
        }
        ClusterSchema clusterSchema = stepMeta.getClusterSchema();
        if (clusterSchema == null) {
            continue;
        }
        // Make a copy of the partition schema because we might change the object.
        // Let's not alter the original transformation.
        // The match is done on name, and the name is preserved in this case, so it should be safe to do so.
        // Also, all cloned steps re-match with the cloned schema name afterwards...
        // 
        PartitionSchema partitionSchema = (PartitionSchema) stepPartitioningMeta.getPartitionSchema().clone();
        int nrSlaves = clusterSchema.findNrSlaves();
        if (nrSlaves == 0) {
            // no slaves: ignore this situation too
            continue;
        }
        // 
        if (partitionSchema.isDynamicallyDefined()) {
            partitionSchema.expandPartitionsDynamically(nrSlaves, originalTransformation);
        }
        int nrPartitions = partitionSchema.getPartitionIDs().size();
        if (nrPartitions < nrSlaves) {
            throw new KettleException("It doesn't make sense to have a partitioned, clustered step with less partitions (" + nrPartitions + ") than that there are slave servers (" + nrSlaves + ")");
        }
        int slaveServerNr = 0;
        List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();
        for (int p = 0; p < nrPartitions; p++) {
            String partitionId = partitionSchema.getPartitionIDs().get(p);
            SlaveServer slaveServer = slaveServers.get(slaveServerNr);
            // 
            if (slaveServer.isMaster()) {
                slaveServerNr++;
                if (slaveServerNr >= slaveServers.size()) {
                    // re-start
                    slaveServerNr = 0;
                }
                slaveServer = slaveServers.get(slaveServerNr);
            }
            Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
            if (schemaPartitionsMap == null) {
                // Add the schema-partitions map to the the slave server
                // 
                schemaPartitionsMap = new HashMap<PartitionSchema, List<String>>();
                slaveServerPartitionsMap.put(slaveServer, schemaPartitionsMap);
            }
            // See if we find a list of partitions
            // 
            List<String> partitions = schemaPartitionsMap.get(partitionSchema);
            if (partitions == null) {
                partitions = new ArrayList<String>();
                schemaPartitionsMap.put(partitionSchema, partitions);
            }
            // 
            if (partitions.indexOf(partitionId) < 0) {
                partitions.add(partitionId);
            }
            // Switch to next slave.
            slaveServerNr++;
            if (slaveServerNr >= clusterSchema.getSlaveServers().size()) {
                // re-start
                slaveServerNr = 0;
            }
        }
    }
// System.out.println("We have "+(slaveServerPartitionsMap.size())+" entries in the slave server partitions map");
}

Also used : KettleException(org.pentaho.di.core.exception.KettleException) PartitionSchema(org.pentaho.di.partition.PartitionSchema) SlaveServer(org.pentaho.di.cluster.SlaveServer) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepMeta(org.pentaho.di.trans.step.StepMeta) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ClusterSchema(org.pentaho.di.cluster.ClusterSchema)

Example 5 with StepPartitioningMeta

use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.

the class TransSplitter method addSlaveCopy.

/**
 * Create a copy of a step from the original transformation for use in the a slave transformation. If the step is
 * partitioned, the partitioning will be changed to "schemaName (slave)"
 *
 * @param stepMeta
 *          The step to copy / clone.
 * @return a copy of the specified step for use in a slave transformation.
 */
private StepMeta addSlaveCopy(TransMeta transMeta, StepMeta stepMeta, SlaveServer slaveServer) {
    StepMeta copy = (StepMeta) stepMeta.clone();
    if (copy.isPartitioned()) {
        StepPartitioningMeta stepPartitioningMeta = copy.getStepPartitioningMeta();
        PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
        String slavePartitionSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
        PartitionSchema slaveSchema = transMeta.findPartitionSchema(slavePartitionSchemaName);
        if (slaveSchema != null) {
            stepPartitioningMeta.setPartitionSchema(slaveSchema);
        }
        // Always just start a single copy on the slave server...
        // Otherwise the confusion w.r.t. to partitioning & re-partitioning would be complete.
        // 
        copy.setCopies(1);
    }
    transMeta.addStep(copy);
    return copy;
}

Also used : PartitionSchema(org.pentaho.di.partition.PartitionSchema) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepMeta(org.pentaho.di.trans.step.StepMeta)

Aggregations

StepPartitioningMeta (org.pentaho.di.trans.step.StepPartitioningMeta)25 PartitionSchema (org.pentaho.di.partition.PartitionSchema)18 StepMeta (org.pentaho.di.trans.step.StepMeta)16 TransMeta (org.pentaho.di.trans.TransMeta)9 KettleException (org.pentaho.di.core.exception.KettleException)8 Test (org.junit.Test)6 StepMetaInterface (org.pentaho.di.trans.step.StepMetaInterface)5 DummyTransMeta (org.pentaho.di.trans.steps.dummytrans.DummyTransMeta)5 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 List (java.util.List)4 ClusterSchema (org.pentaho.di.cluster.ClusterSchema)4 NotePadMeta (org.pentaho.di.core.NotePadMeta)4 TransHopMeta (org.pentaho.di.trans.TransHopMeta)4 SlaveServer (org.pentaho.di.cluster.SlaveServer)3 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)3 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)3 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)3 StepErrorMeta (org.pentaho.di.trans.step.StepErrorMeta)3 HashMap (java.util.HashMap)2