use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class SingleThreadedTransExecutor method sortSteps.
/**
* Sort the steps from start to finish...
*/
private void sortSteps() {
// The bubble sort algorithm in contrast to the QuickSort or MergeSort
// algorithms
// does indeed cover all possibilities.
// Sorting larger transformations with hundreds of steps might be too slow
// though.
// We should consider caching TransMeta.findPrevious() results in that case.
//
trans.getTransMeta().clearCaches();
//
// Cocktail sort (bi-directional bubble sort)
//
// Original sort was taking 3ms for 30 steps
// cocktail sort takes about 8ms for the same 30, but it works :)
// set these to true if you are working on this algorithm and don't like
// flying blind.
//
// log sort details
boolean testing = true;
int stepsMinSize = 0;
int stepsSize = steps.size();
// Noticed a problem with an immediate shrinking iteration window
// trapping rows that need to be sorted.
// This threshold buys us some time to get the sorting close before
// starting to decrease the window size.
//
// TODO: this could become much smarter by tracking row movement
// and reacting to that each outer iteration verses
// using a threshold.
//
// After this many iterations enable trimming inner iteration
// window on no change being detected.
//
int windowShrinkThreshold = (int) Math.round(stepsSize * 0.75);
// give ourselves some room to sort big lists. the window threshold should
// stop us before reaching this anyway.
//
int totalIterations = stepsSize * 2;
int actualIterations = 0;
boolean isBefore = false;
boolean forwardChange = false;
boolean backwardChange = false;
boolean lastForwardChange = true;
boolean keepSortingForward = true;
StepMetaDataCombi one = null;
StepMetaDataCombi two = null;
// this helps group our
StringBuilder tLogString = new StringBuilder();
// output so other threads
// don't get logs in our
// output.
tLogString.append("-------------------------------------------------------").append("\n");
tLogString.append("--SingleThreadedTransExecutor.sortSteps(cocktail)").append("\n");
tLogString.append("--Trans: ").append(trans.getName()).append("\n");
tLogString.append("-").append("\n");
long startTime = System.currentTimeMillis();
for (int x = 0; x < totalIterations; x++) {
//
if (keepSortingForward) {
for (int y = stepsMinSize; y < stepsSize - 1; y++) {
one = steps.get(y);
two = steps.get(y + 1);
isBefore = trans.getTransMeta().findPrevious(one.stepMeta, two.stepMeta);
if (isBefore) {
// two was found to be positioned BEFORE one so we need to
// switch them...
//
steps.set(y, two);
steps.set(y + 1, one);
forwardChange = true;
}
}
}
//
for (int z = stepsSize - 1; z > stepsMinSize; z--) {
one = steps.get(z);
two = steps.get(z - 1);
isBefore = trans.getTransMeta().findPrevious(one.stepMeta, two.stepMeta);
if (!isBefore) {
// two was found NOT to be positioned BEFORE one so we need to
// switch them...
//
steps.set(z, two);
steps.set(z - 1, one);
backwardChange = true;
}
}
//
if (x > windowShrinkThreshold && !forwardChange) {
// should we keep going? check the window size
//
stepsSize--;
if (stepsSize <= stepsMinSize) {
if (testing) {
tLogString.append(String.format("stepsMinSize:%s stepsSize:%s", stepsMinSize, stepsSize));
tLogString.append("stepsSize is <= stepsMinSize.. exiting outer sort loop. index:" + x).append("\n");
}
break;
}
}
//
if (x > windowShrinkThreshold && !backwardChange) {
// should we keep going? check the window size
//
stepsMinSize++;
if (stepsMinSize >= stepsSize) {
if (testing) {
tLogString.append(String.format("stepsMinSize:%s stepsSize:%s", stepsMinSize, stepsSize)).append("\n");
tLogString.append("stepsMinSize is >= stepsSize.. exiting outer sort loop. index:" + x).append("\n");
}
break;
}
}
// End of both forward and backward traversal.
// Time to see if we should keep going.
//
actualIterations++;
if (!forwardChange && !backwardChange) {
if (testing) {
tLogString.append(String.format("existing outer loop because no " + "change was detected going forward or backward. index:%s min:%s max:%s", x, stepsMinSize, stepsSize)).append("\n");
}
break;
}
//
if (keepSortingForward && x > 0 && !lastForwardChange && !forwardChange) {
keepSortingForward = false;
}
lastForwardChange = forwardChange;
forwardChange = false;
backwardChange = false;
}
// finished sorting
long endTime = System.currentTimeMillis();
long totalTime = (endTime - startTime);
tLogString.append("-------------------------------------------------------").append("\n");
tLogString.append("Steps sort time: " + totalTime + "ms").append("\n");
tLogString.append("Total iterations: " + actualIterations).append("\n");
tLogString.append("Step count: " + steps.size()).append("\n");
tLogString.append("Steps after sort: ").append("\n");
for (StepMetaDataCombi combi : steps) {
tLogString.append(combi.step.getStepname()).append("\n");
}
tLogString.append("-------------------------------------------------------").append("\n");
if (log.isDetailed()) {
log.logDetailed(tLogString.toString());
}
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class SingleThreadedTransExecutor method init.
public boolean init() throws KettleException {
//
for (StepMetaDataCombi combi : steps) {
TransformationType[] types = combi.stepMeta.getStepMetaInterface().getSupportedTransformationTypes();
boolean ok = false;
for (TransformationType type : types) {
if (type == TransformationType.SingleThreaded) {
ok = true;
}
}
if (!ok) {
throw new KettleException("Step '" + combi.stepname + "' of type '" + combi.stepMeta.getStepID() + "' is not yet supported in a Single Threaded transformation engine.");
}
}
//
for (StepMetaDataCombi combi : steps) {
boolean ok = combi.step.init(combi.meta, combi.data);
if (!ok) {
return false;
}
}
return true;
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class SingleThreadedTransExecutor method oneIteration.
/**
* Give all steps in the transformation the chance to process all rows on input...
*
* @return true if more iterations can be performed. False if this is not the case.
*/
public boolean oneIteration() throws KettleException {
for (int s = 0; s < steps.size() && !trans.isStopped(); s++) {
if (!done[s]) {
StepMetaDataCombi combi = steps.get(s);
// If this step is waiting for data (text, db, and so on), we simply read all the data
// This means that it is impractical to use this transformation type to load large files.
//
boolean stepDone = false;
// For every input row we call the processRow() method of the step.
//
List<RowSet> infoRowSets = stepInfoRowSets.get(s);
//
for (RowSet rowSet : infoRowSets) {
boolean once = true;
while (once || (rowSet.size() > 0 && !stepDone)) {
once = false;
stepDone = !combi.step.processRow(combi.meta, combi.data);
if (combi.step.getErrors() > 0) {
return false;
}
}
}
// Do normal processing of input rows...
//
List<RowSet> rowSets = combi.step.getInputRowSets();
//
if (rowSets.size() == 0) {
while (!stepDone && !trans.isStopped()) {
stepDone = !combi.step.processRow(combi.meta, combi.data);
if (combi.step.getErrors() > 0) {
return false;
}
}
} else {
// Since we can't be sure that the step actually reads from the row sets where we measure rows,
// we simply count the total nr of rows on input. The steps will find the rows in either row set.
//
int nrRows = 0;
for (RowSet rowSet : rowSets) {
nrRows += rowSet.size();
}
//
for (int i = 0; i < nrRows; i++) {
stepDone = !combi.step.processRow(combi.meta, combi.data);
if (combi.step.getErrors() > 0) {
return false;
}
}
}
// Signal the step that a batch of rows has passed for this iteration (sort rows and all)
//
combi.step.batchComplete();
if (stepDone) {
nrDone++;
}
done[s] = stepDone;
}
}
return nrDone < steps.size() && !trans.isStopped();
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class Trans method findMappingOutput.
/**
* Gets the mapping outputs for each step in the transformation.
*
* @return an array of MappingOutputs
*/
public MappingOutput[] findMappingOutput() {
List<MappingOutput> list = new ArrayList<>();
if (steps != null) {
// Look in threads and find the MappingInput step thread...
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi smdc = steps.get(i);
StepInterface step = smdc.step;
if (step.getStepID().equalsIgnoreCase("MappingOutput")) {
list.add((MappingOutput) step);
}
}
}
return list.toArray(new MappingOutput[list.size()]);
}
use of org.pentaho.di.trans.step.StepMetaDataCombi in project pentaho-kettle by pentaho.
the class Trans method findRunThread.
/**
* Find the run thread for the step with the specified name.
*
* @param stepname
* the step name
* @return a StepInterface object corresponding to the run thread for the specified step
*/
public StepInterface findRunThread(String stepname) {
if (steps == null) {
return null;
}
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi sid = steps.get(i);
StepInterface step = sid.step;
if (step.getStepname().equalsIgnoreCase(stepname)) {
return step;
}
}
return null;
}
Aggregations