Search in sources :

Example 1 with BlockingBatchingRowSet

use of org.pentaho.di.core.BlockingBatchingRowSet in project pentaho-kettle by pentaho.

the class Trans method prepareExecution.

/**
 * Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
 * and tracking the steps and hops in the transformation.
 *
 * @param arguments the arguments to use for this transformation
 * @throws KettleException in case the transformation could not be prepared (initialized)
 */
public void prepareExecution(String[] arguments) throws KettleException {
    setPreparing(true);
    startDate = null;
    setRunning(false);
    log.snap(Metrics.METRIC_TRANSFORMATION_EXECUTION_START);
    log.snap(Metrics.METRIC_TRANSFORMATION_INIT_START);
    ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationPrepareExecution.id, this);
    checkCompatibility();
    // 
    if (arguments != null) {
        setArguments(arguments);
    }
    if (parentTrans != null) {
        IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentTrans, log));
    } else {
        // If there is no parent, one of these still needs to be called to instantiate a new cache
        IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentJob, log));
    }
    activateParameters();
    transMeta.activateParameters();
    ConnectionUtil.init(transMeta);
    if (transMeta.getName() == null) {
        if (transMeta.getFilename() != null) {
            log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
        }
    } else {
        log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
    }
    if (getArguments() != null) {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
        }
    }
    if (isSafeModeEnabled()) {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
        }
    }
    if (getReplayDate() != null) {
        SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
        log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
    } else {
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
        }
    }
    // 
    if (servletPrintWriter == null) {
        String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
        if (encoding == null) {
            servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
        } else {
            try {
                servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
            } catch (UnsupportedEncodingException ex) {
                servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
            }
        }
    }
    // Keep track of all the row sets and allocated steps
    // 
    steps = new ArrayList<>();
    rowsets = new ArrayList<>();
    List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
    }
    // 
    for (int i = 0; i < hopsteps.size(); i++) {
        StepMeta thisStep = hopsteps.get(i);
        if (thisStep.isMapping()) {
            // handled and allocated by the mapping step itself.
            continue;
        }
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
        }
        List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
        int nrTargets = nextSteps.size();
        for (int n = 0; n < nrTargets; n++) {
            // What's the next step?
            StepMeta nextStep = nextSteps.get(n);
            if (nextStep.isMapping()) {
                // handled and allocated by the mapping step itself.
                continue;
            }
            // How many times do we start the source step?
            int thisCopies = thisStep.getCopies();
            if (thisCopies < 0) {
                // 
                throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
            }
            // How many times do we start the target step?
            int nextCopies = nextStep.getCopies();
            // Are we re-partitioning?
            boolean repartitioning;
            if (thisStep.isPartitioned()) {
                repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
            } else {
                repartitioning = nextStep.isPartitioned();
            }
            int nrCopies;
            if (log.isDetailed()) {
                log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
            }
            int dispatchType;
            if (thisCopies == 1 && nextCopies == 1) {
                dispatchType = TYPE_DISP_1_1;
                nrCopies = 1;
            } else if (thisCopies == 1 && nextCopies > 1) {
                dispatchType = TYPE_DISP_1_N;
                nrCopies = nextCopies;
            } else if (thisCopies > 1 && nextCopies == 1) {
                dispatchType = TYPE_DISP_N_1;
                nrCopies = thisCopies;
            } else if (thisCopies == nextCopies && !repartitioning) {
                dispatchType = TYPE_DISP_N_N;
                nrCopies = nextCopies;
            } else {
                // > 1!
                dispatchType = TYPE_DISP_N_M;
                nrCopies = nextCopies;
            }
            // 
            if (dispatchType != TYPE_DISP_N_M) {
                for (int c = 0; c < nrCopies; c++) {
                    RowSet rowSet;
                    switch(transMeta.getTransformationType()) {
                        case Normal:
                            // This is a temporary patch until the batching rowset has proven
                            // to be working in all situations.
                            // Currently there are stalling problems when dealing with small
                            // amounts of rows.
                            // 
                            Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
                            if (batchingRowSet != null && batchingRowSet.booleanValue()) {
                                rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
                            } else {
                                rowSet = new BlockingRowSet(transMeta.getSizeRowset());
                            }
                            break;
                        case SerialSingleThreaded:
                            rowSet = new SingleRowRowSet();
                            break;
                        case SingleThreaded:
                            rowSet = new QueueRowSet();
                            break;
                        default:
                            throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
                    }
                    switch(dispatchType) {
                        case TYPE_DISP_1_1:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
                            break;
                        case TYPE_DISP_1_N:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
                            break;
                        case TYPE_DISP_N_1:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
                            break;
                        case TYPE_DISP_N_N:
                            rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
                            break;
                        default:
                            break;
                    }
                    rowsets.add(rowSet);
                    if (log.isDetailed()) {
                        log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
                    }
                }
            } else {
                // distribution...
                for (int s = 0; s < thisCopies; s++) {
                    for (int t = 0; t < nextCopies; t++) {
                        BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
                        rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
                        rowsets.add(rowSet);
                        if (log.isDetailed()) {
                            log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
                        }
                    }
                }
            }
        }
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
    }
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
    }
    // 
    for (int i = 0; i < hopsteps.size(); i++) {
        StepMeta stepMeta = hopsteps.get(i);
        String stepid = stepMeta.getStepID();
        if (log.isDetailed()) {
            log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
        }
        // How many copies are launched of this step?
        int nrCopies = stepMeta.getCopies();
        if (log.isDebug()) {
            log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
        }
        // At least run once...
        for (int c = 0; c < nrCopies; c++) {
            // Make sure we haven't started it yet!
            if (!hasStepStarted(stepMeta.getName(), c)) {
                StepMetaDataCombi combi = new StepMetaDataCombi();
                combi.stepname = stepMeta.getName();
                combi.copy = c;
                // The meta-data
                combi.stepMeta = stepMeta;
                combi.meta = stepMeta.getStepMetaInterface();
                // Allocate the step data
                StepDataInterface data = combi.meta.getStepData();
                combi.data = data;
                // Allocate the step
                StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
                // Copy the variables of the transformation to the step...
                // don't share. Each copy of the step has its own variables.
                // 
                step.initializeVariablesFrom(this);
                step.setUsingThreadPriorityManagment(transMeta.isUsingThreadPriorityManagment());
                // Pass the connected repository & metaStore to the steps runtime
                // 
                step.setRepository(repository);
                step.setMetaStore(metaStore);
                // things as well...
                if (stepMeta.isPartitioned()) {
                    List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
                    if (partitionIDs != null && !partitionIDs.isEmpty()) {
                        // Pass the partition ID
                        step.setPartitionID(partitionIDs.get(c));
                    // to the step
                    }
                }
                // Save the step too
                combi.step = step;
                // /
                if (combi.step instanceof LoggingObjectInterface) {
                    LogChannelInterface logChannel = combi.step.getLogChannel();
                    logChannel.setLogLevel(logLevel);
                    logChannel.setGatheringMetrics(log.isGatheringMetrics());
                }
                // Add to the bunch...
                steps.add(combi);
                if (log.isDetailed()) {
                    log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
                }
            }
        }
    }
    // 
    for (int s = 0; s < steps.size(); s++) {
        StepMetaDataCombi combi = steps.get(s);
        if (combi.stepMeta.isDoingErrorHandling()) {
            combi.step.identifyErrorOutput();
        }
    }
    // Now (optionally) write start log record!
    // Make sure we synchronize appropriately to avoid duplicate batch IDs.
    // 
    Object syncObject = this;
    if (parentJob != null) {
        // parallel execution in a job
        syncObject = parentJob;
    }
    if (parentTrans != null) {
        // multiple sub-transformations
        syncObject = parentTrans;
    }
    synchronized (syncObject) {
        calculateBatchIdAndDateRange();
        beginProcessing();
    }
    // 
    for (int i = 0; i < steps.size(); i++) {
        StepMetaDataCombi sid = steps.get(i);
        StepMeta stepMeta = sid.stepMeta;
        StepInterface baseStep = sid.step;
        baseStep.setPartitioned(stepMeta.isPartitioned());
        // Now let's take a look at the source and target relation
        // 
        // If this source step is not partitioned, and the target step is: it
        // means we need to re-partition the incoming data.
        // If both steps are partitioned on the same method and schema, we don't
        // need to re-partition
        // If both steps are partitioned on a different method or schema, we need
        // to re-partition as well.
        // If both steps are not partitioned, we don't need to re-partition
        // 
        boolean isThisPartitioned = stepMeta.isPartitioned();
        PartitionSchema thisPartitionSchema = null;
        if (isThisPartitioned) {
            thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
        }
        boolean isNextPartitioned = false;
        StepPartitioningMeta nextStepPartitioningMeta = null;
        PartitionSchema nextPartitionSchema = null;
        List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
        int nrNext = nextSteps.size();
        for (int p = 0; p < nrNext; p++) {
            StepMeta nextStep = nextSteps.get(p);
            if (nextStep.isPartitioned()) {
                isNextPartitioned = true;
                nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
                nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
            }
        }
        baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_NONE);
        // 
        if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
            baseStep.setRepartitioning(nextStepPartitioningMeta.getMethodType());
        }
        // For partitioning to a set of remove steps (repartitioning from a master
        // to a set or remote output steps)
        // 
        StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
        if (targetStepPartitioningMeta != null) {
            baseStep.setRepartitioning(targetStepPartitioningMeta.getMethodType());
        }
    }
    setPreparing(false);
    setInitializing(true);
    // 
    if (isMonitored() && steps.size() < 150) {
        doTopologySortOfSteps();
    }
    if (log.isDetailed()) {
        log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
    }
    StepInitThread[] initThreads = new StepInitThread[steps.size()];
    Thread[] threads = new Thread[steps.size()];
    // 
    for (int i = 0; i < steps.size(); i++) {
        final StepMetaDataCombi sid = steps.get(i);
        // Do the init code in the background!
        // Init all steps at once, but ALL steps need to finish before we can
        // continue properly!
        // 
        initThreads[i] = new StepInitThread(sid, log);
        // Put it in a separate thread!
        // 
        threads[i] = new Thread(initThreads[i]);
        threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
        ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepBeforeInitialize.id, initThreads[i]);
        threads[i].start();
    }
    for (int i = 0; i < threads.length; i++) {
        try {
            threads[i].join();
            ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepAfterInitialize.id, initThreads[i]);
        } catch (Exception ex) {
            log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
            log.logError(Const.getStackTracker(ex));
        }
    }
    setInitializing(false);
    boolean ok = true;
    // 
    for (int i = 0; i < initThreads.length; i++) {
        StepMetaDataCombi combi = initThreads[i].getCombi();
        if (!initThreads[i].isOk()) {
            log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));
            combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
            ok = false;
        } else {
            combi.data.setStatus(StepExecutionStatus.STATUS_IDLE);
            if (log.isDetailed()) {
                log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
            }
        }
    }
    if (!ok) {
        // One or more steps failed on initialization.
        // Transformation is now stopped.
        setStopped(true);
        // 
        for (int i = 0; i < initThreads.length; i++) {
            StepMetaDataCombi combi = initThreads[i].getCombi();
            // Dispose will overwrite the status, but we set it back right after
            // this.
            combi.step.dispose(combi.meta, combi.data);
            if (initThreads[i].isOk()) {
                combi.data.setStatus(StepExecutionStatus.STATUS_HALTED);
            } else {
                combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
            }
        }
        // Just for safety, fire the trans finished listeners...
        try {
            fireTransFinishedListeners();
        } catch (KettleException e) {
            // listeners produces errors
            log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
        // we will not pass this exception up to prepareExecution() entry point.
        } finally {
            // Flag the transformation as finished even if exception was thrown
            setFinished(true);
        }
        // 
        if (preview) {
            String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
            throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
        } else {
            throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
        }
    }
    log.snap(Metrics.METRIC_TRANSFORMATION_INIT_STOP);
    KettleEnvironment.setExecutionInformation(this, repository);
    setReadyToStart(true);
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) QueueRowSet(org.pentaho.di.core.QueueRowSet) SingleRowRowSet(org.pentaho.di.core.SingleRowRowSet) RowSet(org.pentaho.di.core.RowSet) BlockingBatchingRowSet(org.pentaho.di.core.BlockingBatchingRowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) StepPartitioningMeta(org.pentaho.di.trans.step.StepPartitioningMeta) StepInterface(org.pentaho.di.trans.step.StepInterface) QueueRowSet(org.pentaho.di.core.QueueRowSet) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) StepDataInterface(org.pentaho.di.trans.step.StepDataInterface) BlockingBatchingRowSet(org.pentaho.di.core.BlockingBatchingRowSet) PrintWriter(java.io.PrintWriter) PartitionSchema(org.pentaho.di.partition.PartitionSchema) UnsupportedEncodingException(java.io.UnsupportedEncodingException) StepMeta(org.pentaho.di.trans.step.StepMeta) KettleExtensionPoint(org.pentaho.di.core.extension.KettleExtensionPoint) UnknownParamException(org.pentaho.di.core.parameters.UnknownParamException) KettleValueException(org.pentaho.di.core.exception.KettleValueException) KettleTransException(org.pentaho.di.core.exception.KettleTransException) DuplicateParamException(org.pentaho.di.core.parameters.DuplicateParamException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) KettleException(org.pentaho.di.core.exception.KettleException) KettleDatabaseException(org.pentaho.di.core.exception.KettleDatabaseException) RunThread(org.pentaho.di.trans.step.RunThread) StepInitThread(org.pentaho.di.trans.step.StepInitThread) SingleRowRowSet(org.pentaho.di.core.SingleRowRowSet) OutputStreamWriter(java.io.OutputStreamWriter) FileObject(org.apache.commons.vfs2.FileObject) LoggingObjectInterface(org.pentaho.di.core.logging.LoggingObjectInterface) SimpleDateFormat(java.text.SimpleDateFormat) LogChannelInterface(org.pentaho.di.core.logging.LogChannelInterface) HasLogChannelInterface(org.pentaho.di.core.logging.HasLogChannelInterface) StepInitThread(org.pentaho.di.trans.step.StepInitThread)

Aggregations

OutputStreamWriter (java.io.OutputStreamWriter)1 PrintWriter (java.io.PrintWriter)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 FileObject (org.apache.commons.vfs2.FileObject)1 BlockingBatchingRowSet (org.pentaho.di.core.BlockingBatchingRowSet)1 BlockingRowSet (org.pentaho.di.core.BlockingRowSet)1 QueueRowSet (org.pentaho.di.core.QueueRowSet)1 RowSet (org.pentaho.di.core.RowSet)1 SingleRowRowSet (org.pentaho.di.core.SingleRowRowSet)1 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)1 KettleException (org.pentaho.di.core.exception.KettleException)1 KettleFileException (org.pentaho.di.core.exception.KettleFileException)1 KettleTransException (org.pentaho.di.core.exception.KettleTransException)1 KettleValueException (org.pentaho.di.core.exception.KettleValueException)1 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)1 HasLogChannelInterface (org.pentaho.di.core.logging.HasLogChannelInterface)1 LogChannelInterface (org.pentaho.di.core.logging.LogChannelInterface)1 LoggingObjectInterface (org.pentaho.di.core.logging.LoggingObjectInterface)1 DuplicateParamException (org.pentaho.di.core.parameters.DuplicateParamException)1