Search in sources :

Example 31 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class SingleThreadedTransExecutor method sortSteps.

/**
 * Sort the steps from start to finish...
 */
private void sortSteps() {
    // The bubble sort algorithm in contrast to the QuickSort or MergeSort
    // algorithms
    // does indeed cover all possibilities.
    // Sorting larger transformations with hundreds of steps might be too slow
    // though.
    // We should consider caching TransMeta.findPrevious() results in that case.
    // 
    trans.getTransMeta().clearCaches();
    // 
    // Cocktail sort (bi-directional bubble sort)
    // 
    // Original sort was taking 3ms for 30 steps
    // cocktail sort takes about 8ms for the same 30, but it works :)
    // set these to true if you are working on this algorithm and don't like
    // flying blind.
    // 
    // log sort details
    boolean testing = true;
    int stepsMinSize = 0;
    int stepsSize = steps.size();
    // Noticed a problem with an immediate shrinking iteration window
    // trapping rows that need to be sorted.
    // This threshold buys us some time to get the sorting close before
    // starting to decrease the window size.
    // 
    // TODO: this could become much smarter by tracking row movement
    // and reacting to that each outer iteration verses
    // using a threshold.
    // 
    // After this many iterations enable trimming inner iteration
    // window on no change being detected.
    // 
    int windowShrinkThreshold = (int) Math.round(stepsSize * 0.75);
    // give ourselves some room to sort big lists. the window threshold should
    // stop us before reaching this anyway.
    // 
    int totalIterations = stepsSize * 2;
    int actualIterations = 0;
    boolean isBefore = false;
    boolean forwardChange = false;
    boolean backwardChange = false;
    boolean lastForwardChange = true;
    boolean keepSortingForward = true;
    StepMetaDataCombi one = null;
    StepMetaDataCombi two = null;
    // this helps group our
    StringBuilder tLogString = new StringBuilder();
    // output so other threads
    // don't get logs in our
    // output.
    tLogString.append("-------------------------------------------------------").append("\n");
    tLogString.append("--SingleThreadedTransExecutor.sortSteps(cocktail)").append("\n");
    tLogString.append("--Trans: ").append(trans.getName()).append("\n");
    tLogString.append("-").append("\n");
    long startTime = System.currentTimeMillis();
    for (int x = 0; x < totalIterations; x++) {
        // 
        if (keepSortingForward) {
            for (int y = stepsMinSize; y < stepsSize - 1; y++) {
                one = steps.get(y);
                two = steps.get(y + 1);
                isBefore = trans.getTransMeta().findPrevious(one.stepMeta, two.stepMeta);
                if (isBefore) {
                    // two was found to be positioned BEFORE one so we need to
                    // switch them...
                    // 
                    steps.set(y, two);
                    steps.set(y + 1, one);
                    forwardChange = true;
                }
            }
        }
        // 
        for (int z = stepsSize - 1; z > stepsMinSize; z--) {
            one = steps.get(z);
            two = steps.get(z - 1);
            isBefore = trans.getTransMeta().findPrevious(one.stepMeta, two.stepMeta);
            if (!isBefore) {
                // two was found NOT to be positioned BEFORE one so we need to
                // switch them...
                // 
                steps.set(z, two);
                steps.set(z - 1, one);
                backwardChange = true;
            }
        }
        // 
        if (x > windowShrinkThreshold && !forwardChange) {
            // should we keep going? check the window size
            // 
            stepsSize--;
            if (stepsSize <= stepsMinSize) {
                if (testing) {
                    tLogString.append(String.format("stepsMinSize:%s  stepsSize:%s", stepsMinSize, stepsSize));
                    tLogString.append("stepsSize is <= stepsMinSize.. exiting outer sort loop. index:" + x).append("\n");
                }
                break;
            }
        }
        // 
        if (x > windowShrinkThreshold && !backwardChange) {
            // should we keep going? check the window size
            // 
            stepsMinSize++;
            if (stepsMinSize >= stepsSize) {
                if (testing) {
                    tLogString.append(String.format("stepsMinSize:%s  stepsSize:%s", stepsMinSize, stepsSize)).append("\n");
                    tLogString.append("stepsMinSize is >= stepsSize.. exiting outer sort loop. index:" + x).append("\n");
                }
                break;
            }
        }
        // End of both forward and backward traversal.
        // Time to see if we should keep going.
        // 
        actualIterations++;
        if (!forwardChange && !backwardChange) {
            if (testing) {
                tLogString.append(String.format("existing outer loop because no " + "change was detected going forward or backward. index:%s  min:%s  max:%s", x, stepsMinSize, stepsSize)).append("\n");
            }
            break;
        }
        // 
        if (keepSortingForward && x > 0 && !lastForwardChange && !forwardChange) {
            keepSortingForward = false;
        }
        lastForwardChange = forwardChange;
        forwardChange = false;
        backwardChange = false;
    }
    // finished sorting
    long endTime = System.currentTimeMillis();
    long totalTime = (endTime - startTime);
    tLogString.append("-------------------------------------------------------").append("\n");
    tLogString.append("Steps sort time: " + totalTime + "ms").append("\n");
    tLogString.append("Total iterations: " + actualIterations).append("\n");
    tLogString.append("Step count: " + steps.size()).append("\n");
    tLogString.append("Steps after sort: ").append("\n");
    for (StepMetaDataCombi combi : steps) {
        tLogString.append(combi.step.getStepname()).append("\n");
    }
    tLogString.append("-------------------------------------------------------").append("\n");
    if (log.isDetailed()) {
        log.logDetailed(tLogString.toString());
    }
}
Also used : StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi)

Example 32 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class SingleThreadedTransExecutor method init.

public boolean init() throws KettleException {
    // 
    for (StepMetaDataCombi combi : steps) {
        TransformationType[] types = combi.stepMeta.getStepMetaInterface().getSupportedTransformationTypes();
        boolean ok = false;
        for (TransformationType type : types) {
            if (type == TransformationType.SingleThreaded) {
                ok = true;
            }
        }
        if (!ok) {
            throw new KettleException("Step '" + combi.stepname + "' of type '" + combi.stepMeta.getStepID() + "' is not yet supported in a Single Threaded transformation engine.");
        }
    }
    // 
    for (StepMetaDataCombi combi : steps) {
        boolean ok = combi.step.init(combi.meta, combi.data);
        if (!ok) {
            return false;
        }
    }
    return true;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) TransformationType(org.pentaho.di.trans.TransMeta.TransformationType)

Example 33 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class SingleThreadedTransExecutor method oneIteration.

/**
 * Give all steps in the transformation the chance to process all rows on input...
 *
 * @return true if more iterations can be performed. False if this is not the case.
 */
public boolean oneIteration() throws KettleException {
    for (int s = 0; s < steps.size() && !trans.isStopped(); s++) {
        if (!done[s]) {
            StepMetaDataCombi combi = steps.get(s);
            // If this step is waiting for data (text, db, and so on), we simply read all the data
            // This means that it is impractical to use this transformation type to load large files.
            // 
            boolean stepDone = false;
            // For every input row we call the processRow() method of the step.
            // 
            List<RowSet> infoRowSets = stepInfoRowSets.get(s);
            // 
            for (RowSet rowSet : infoRowSets) {
                boolean once = true;
                while (once || (rowSet.size() > 0 && !stepDone)) {
                    once = false;
                    stepDone = !combi.step.processRow(combi.meta, combi.data);
                    if (combi.step.getErrors() > 0) {
                        return false;
                    }
                }
            }
            // Do normal processing of input rows...
            // 
            List<RowSet> rowSets = combi.step.getInputRowSets();
            // 
            if (rowSets.size() == 0) {
                while (!stepDone && !trans.isStopped()) {
                    stepDone = !combi.step.processRow(combi.meta, combi.data);
                    if (combi.step.getErrors() > 0) {
                        return false;
                    }
                }
            } else {
                // Since we can't be sure that the step actually reads from the row sets where we measure rows,
                // we simply count the total nr of rows on input. The steps will find the rows in either row set.
                // 
                int nrRows = 0;
                for (RowSet rowSet : rowSets) {
                    nrRows += rowSet.size();
                }
                // 
                for (int i = 0; i < nrRows; i++) {
                    stepDone = !combi.step.processRow(combi.meta, combi.data);
                    if (combi.step.getErrors() > 0) {
                        return false;
                    }
                }
            }
            // Signal the step that a batch of rows has passed for this iteration (sort rows and all)
            // 
            combi.step.batchComplete();
            if (stepDone) {
                nrDone++;
            }
            done[s] = stepDone;
        }
    }
    return nrDone < steps.size() && !trans.isStopped();
}
Also used : StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) RowSet(org.pentaho.di.core.RowSet)

Example 34 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class Trans method findMappingOutput.

/**
 * Gets the mapping outputs for each step in the transformation.
 *
 * @return an array of MappingOutputs
 */
public MappingOutput[] findMappingOutput() {
    List<MappingOutput> list = new ArrayList<>();
    if (steps != null) {
        // Look in threads and find the MappingInput step thread...
        for (int i = 0; i < steps.size(); i++) {
            StepMetaDataCombi smdc = steps.get(i);
            StepInterface step = smdc.step;
            if (step.getStepID().equalsIgnoreCase("MappingOutput")) {
                list.add((MappingOutput) step);
            }
        }
    }
    return list.toArray(new MappingOutput[list.size()]);
}
Also used : StepInterface(org.pentaho.di.trans.step.StepInterface) ArrayList(java.util.ArrayList) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) MappingOutput(org.pentaho.di.trans.steps.mappingoutput.MappingOutput) KettleExtensionPoint(org.pentaho.di.core.extension.KettleExtensionPoint)

Example 35 with StepMetaDataCombi

use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.

the class Trans method findRunThread.

/**
 * Find the run thread for the step with the specified name.
 *
 * @param stepname
 *          the step name
 * @return a StepInterface object corresponding to the run thread for the specified step
 */
public StepInterface findRunThread(String stepname) {
    if (steps == null) {
        return null;
    }
    for (int i = 0; i < steps.size(); i++) {
        StepMetaDataCombi sid = steps.get(i);
        StepInterface step = sid.step;
        if (step.getStepname().equalsIgnoreCase(stepname)) {
            return step;
        }
    }
    return null;
}
Also used : StepInterface(org.pentaho.di.trans.step.StepInterface) StepMetaDataCombi(org.pentaho.di.trans.step.StepMetaDataCombi) KettleExtensionPoint(org.pentaho.di.core.extension.KettleExtensionPoint)

Aggregations

StepMetaDataCombi (org.pentaho.di.trans.step.StepMetaDataCombi)55 StepInterface (org.pentaho.di.trans.step.StepInterface)21 KettleExtensionPoint (org.pentaho.di.core.extension.KettleExtensionPoint)18 Test (org.junit.Test)13 KettleException (org.pentaho.di.core.exception.KettleException)10 ArrayList (java.util.ArrayList)8 StepMeta (org.pentaho.di.trans.step.StepMeta)8 Trans (org.pentaho.di.trans.Trans)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)6 KettleValueException (org.pentaho.di.core.exception.KettleValueException)6 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)6 StepMetaInterface (org.pentaho.di.trans.step.StepMetaInterface)6 RowSet (org.pentaho.di.core.RowSet)5 UnknownParamException (org.pentaho.di.core.parameters.UnknownParamException)5 TransMeta (org.pentaho.di.trans.TransMeta)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 Before (org.junit.Before)4 KettleDatabaseException (org.pentaho.di.core.exception.KettleDatabaseException)4 KettleFileException (org.pentaho.di.core.exception.KettleFileException)4