use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class Mapping method processRow.
/**
* Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we
* look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one
* MappingInput and one MappingOutput step in the Mapping.
*/
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
try {
meta = (MappingMeta) smi;
setData((MappingData) sdi);
MappingInput[] mappingInputs = getData().getMappingTrans().findMappingInput();
MappingOutput[] mappingOutputs = getData().getMappingTrans().findMappingOutput();
getData().wasStarted = true;
switch(getData().mappingTransMeta.getTransformationType()) {
case Normal:
case SerialSingleThreaded:
// Before we start, let's see if there are loose ends to tie up...
//
List<RowSet> inputRowSets = getInputRowSets();
if (!inputRowSets.isEmpty()) {
for (RowSet rowSet : inputRowSets) {
//
if (mappingInputs.length == 1) {
// Simple case: only one input mapping. Move the RowSet over
//
mappingInputs[0].addRowSetToInputRowSets(rowSet);
} else {
//
throw new KettleException("Unsupported situation detected where more than one Mapping Input step needs to be handled. " + "To solve it, insert a dummy step before the mapping step.");
}
}
clearInputRowSets();
}
//
if (!getRemoteInputSteps().isEmpty()) {
//
for (RemoteStep remoteStep : getRemoteInputSteps()) {
//
if (mappingInputs.length == 1) {
// Simple case: only one input mapping. Move the remote step over
//
mappingInputs[0].getRemoteInputSteps().add(remoteStep);
} else {
//
throw new KettleException("Unsupported situation detected where a remote input step is expecting data " + "to end up in a particular Mapping Input step of a sub-transformation. " + "To solve it, insert a dummy step before the mapping.");
}
}
getRemoteInputSteps().clear();
}
// Do the same thing for output row sets
//
List<RowSet> outputRowSets = getOutputRowSets();
if (!outputRowSets.isEmpty()) {
for (RowSet rowSet : outputRowSets) {
//
if (mappingOutputs.length == 1) {
// Simple case: only one output mapping. Move the RowSet over
//
mappingOutputs[0].addRowSetToOutputRowSets(rowSet);
} else {
//
throw new KettleException("Unsupported situation detected where more than one Mapping Output step needs to be handled. " + "To solve it, insert a dummy step after the mapping step.");
}
}
clearOutputRowSets();
}
//
if (!getRemoteOutputSteps().isEmpty()) {
//
for (RemoteStep remoteStep : getRemoteOutputSteps()) {
//
if (mappingOutputs.length == 1) {
// Simple case: only one output mapping. Move the remote step over
//
mappingOutputs[0].getRemoteOutputSteps().add(remoteStep);
} else {
//
throw new KettleException("Unsupported situation detected where a remote output step is expecting data " + "to end up in a particular Mapping Output step of a sub-transformation. " + "To solve it, insert a dummy step after the mapping.");
}
}
getRemoteOutputSteps().clear();
}
// Start the mapping/sub-transformation threads
//
getData().getMappingTrans().startThreads();
//
if (getTransMeta().getTransformationType() == TransformationType.Normal) {
getData().getMappingTrans().waitUntilFinished();
// Set some statistics from the mapping...
// This will show up in Spoon, etc.
//
Result result = getData().getMappingTrans().getResult();
setErrors(result.getNrErrors());
setLinesRead(result.getNrLinesRead());
setLinesWritten(result.getNrLinesWritten());
setLinesInput(result.getNrLinesInput());
setLinesOutput(result.getNrLinesOutput());
setLinesUpdated(result.getNrLinesUpdated());
setLinesRejected(result.getNrLinesRejected());
}
return false;
case SingleThreaded:
if (mappingInputs.length > 1 || mappingOutputs.length > 1) {
throw new KettleException("Multiple input or output steps are not supported for a single threaded mapping.");
}
if ((log != null) && log.isDebug()) {
List<RowSet> mappingInputRowSets = mappingInputs[0].getInputRowSets();
log.logDebug("# of input buffers: " + mappingInputRowSets.size());
if (mappingInputRowSets.size() > 0) {
log.logDebug("Input buffer 0 size: " + mappingInputRowSets.get(0).size());
}
}
// Now execute one batch...Basic logging
//
boolean result = getData().singleThreadedTransExcecutor.oneIteration();
if (!result) {
getData().singleThreadedTransExcecutor.dispose();
setOutputDone();
return false;
}
return true;
default:
throw new KettleException("Transformation type '" + getData().mappingTransMeta.getTransformationType().getDescription() + "' is an unsupported transformation type for a mapping");
}
} catch (Throwable t) {
//
if (getData().getMappingTrans() != null) {
getData().getMappingTrans().stopAll();
}
//
throw new KettleException(t);
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method findOutputRowSet.
/**
* Find an output rowset in a running transformation. It will also look at the "to" step to see if this is a mapping.
* If it is, it will find the appropriate rowset in that transformation.
*
* @param from
* @param fromcopy
* @param to
* @param tocopy
* @return The rowset or null if none is found.
*/
public RowSet findOutputRowSet(String from, int fromcopy, String to, int tocopy) {
synchronized (outputRowSetsLock) {
for (RowSet rs : outputRowSets) {
if (rs.getOriginStepName().equalsIgnoreCase(from) && rs.getDestinationStepName().equalsIgnoreCase(to) && rs.getOriginStepCopy() == fromcopy && rs.getDestinationStepCopy() == tocopy) {
return rs;
}
}
}
// See if the rowset is part of the input of a mapping target step...
//
// Lookup step "To"
//
StepMeta mappingStep = transMeta.findStep(to);
//
if (mappingStep != null && mappingStep.isMapping()) {
// In this case we can cast the step thread to a Mapping...
//
List<StepInterface> baseSteps = trans.findBaseSteps(to);
if (baseSteps.size() == 1) {
Mapping mapping = (Mapping) baseSteps.get(0);
// Find the appropriate rowset in the mapping...
// The rowset in question has been passed over to a Mapping Input step inside the Mapping transformation.
//
MappingInput[] inputs = mapping.getMappingTrans().findMappingInput();
for (MappingInput input : inputs) {
for (RowSet rs : input.getInputRowSets()) {
//
if (rs.getOriginStepName().equalsIgnoreCase(from)) {
return rs;
}
}
}
}
}
//
return null;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method verifyInputDeadLock.
/**
* - A step sees that it can't get a new row from input in the step. - Then it verifies that there is more than one
* input row set and that at least one is full and at least one is empty. - Then it finds a step in the transformation
* (situated before the reader step) which has at least one full and one empty output row set. - If this situation
* presents itself and if it happens twice with the same rows read count (meaning: stalled reading step) we throw an
* exception. For the attached example that exception is:
*
* @throws KettleStepException
*/
protected void verifyInputDeadLock() throws KettleStepException {
RowSet inputFull = null;
RowSet inputEmpty = null;
for (RowSet rowSet : getInputRowSets()) {
if (rowSet.size() == transMeta.getSizeRowset()) {
inputFull = rowSet;
} else if (rowSet.size() == 0) {
inputEmpty = rowSet;
}
}
if (inputFull != null && inputEmpty != null) {
// - one output is empty
for (StepMetaDataCombi combi : trans.getSteps()) {
int inputSize = 0;
List<RowSet> combiInputRowSets = combi.step.getInputRowSets();
int totalSize = combiInputRowSets.size() * transMeta.getSizeRowset();
for (RowSet rowSet : combiInputRowSets) {
inputSize += rowSet.size();
}
// All full probably means a stalled step.
List<RowSet> combiOutputRowSets = combi.step.getOutputRowSets();
if (inputSize > 0 && inputSize == totalSize && combiOutputRowSets.size() > 1) {
RowSet outputFull = null;
RowSet outputEmpty = null;
for (RowSet rowSet : combiOutputRowSets) {
if (rowSet.size() == transMeta.getSizeRowset()) {
outputFull = rowSet;
} else if (rowSet.size() == 0) {
outputEmpty = rowSet;
}
}
if (outputFull != null && outputEmpty != null) {
//
if (transMeta.findPrevious(stepMeta, combi.stepMeta)) {
throw new KettleStepException("A deadlock was detected between steps '" + combi.stepname + "' and '" + stepname + "'. The steps are both waiting for each other because a series of row set buffers filled up.");
}
}
}
}
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method openRemoteOutputStepSocketsOnce.
/**
* Opens socket connections to the remote output steps of this step. <br>
* This method is called in method initBeforeStart() because it needs to connect to the server sockets (remote steps)
* as soon as possible to avoid time-out situations. <br>
* This action is executed only once.
*
* @throws KettleStepException
*/
protected void openRemoteOutputStepSocketsOnce() throws KettleStepException {
if (!remoteOutputSteps.isEmpty()) {
if (!remoteOutputStepsInitialized) {
synchronized (outputRowSetsLock) {
//
for (int c = 0; c < outputRowSets.size(); c++) {
RowSet rowSet = outputRowSets.get(c);
rowSet.setRemoteSlaveServerName(getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME));
if (getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME) == null) {
throw new KettleStepException("Variable '" + Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME + "' is not defined.");
}
}
//
for (RemoteStep remoteStep : remoteOutputSteps) {
try {
if (remoteStep.getTargetSlaveServerName() == null) {
throw new KettleStepException("The target slave server name is not defined for remote output step: " + remoteStep);
}
BlockingRowSet rowSet = remoteStep.openWriterSocket();
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BaseStep.Log.OpenedWriterSocketToRemoteStep", remoteStep));
}
outputRowSets.add(rowSet);
} catch (IOException e) {
throw new KettleStepException("Error opening writer socket to remote step '" + remoteStep + "'", e);
}
}
}
remoteOutputStepsInitialized = true;
}
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class FuzzyMatch method readLookupValues.
private boolean readLookupValues() throws KettleException {
data.infoStream = meta.getStepIOMeta().getInfoStreams().get(0);
if (data.infoStream.getStepMeta() == null) {
logError(BaseMessages.getString(PKG, "FuzzyMatch.Log.NoLookupStepSpecified"));
return false;
}
if (isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "FuzzyMatch.Log.ReadingFromStream") + data.infoStream.getStepname() + "]");
}
boolean firstRun = true;
// Which row set do we read from?
//
RowSet rowSet = findInputRowSet(data.infoStream.getStepname());
// rows are originating from "lookup_from"
Object[] rowData = getRowFrom(rowSet);
while (rowData != null) {
if (firstRun) {
data.infoMeta = rowSet.getRowMeta().clone();
// Check lookup field
int indexOfLookupField = data.infoMeta.indexOfValue(environmentSubstitute(meta.getLookupField()));
if (indexOfLookupField < 0) {
// The field is unreachable !
throw new KettleException(BaseMessages.getString(PKG, "FuzzyMatch.Exception.CouldnotFindLookField", meta.getLookupField()));
}
data.infoCache = new RowMeta();
ValueMetaInterface keyValueMeta = data.infoMeta.getValueMeta(indexOfLookupField);
keyValueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
data.infoCache.addValueMeta(keyValueMeta);
// Add key
data.indexOfCachedFields[0] = indexOfLookupField;
// Check additional fields
if (data.addAdditionalFields) {
ValueMetaInterface additionalFieldValueMeta;
for (int i = 0; i < meta.getValue().length; i++) {
int fi = i + 1;
data.indexOfCachedFields[fi] = data.infoMeta.indexOfValue(meta.getValue()[i]);
if (data.indexOfCachedFields[fi] < 0) {
// The field is unreachable !
throw new KettleException(BaseMessages.getString(PKG, "FuzzyMatch.Exception.CouldnotFindLookField", meta.getValue()[i]));
}
additionalFieldValueMeta = data.infoMeta.getValueMeta(data.indexOfCachedFields[fi]);
additionalFieldValueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
data.infoCache.addValueMeta(additionalFieldValueMeta);
}
data.nrCachedFields += meta.getValue().length;
}
}
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "FuzzyMatch.Log.ReadLookupRow") + rowSet.getRowMeta().getString(rowData));
}
// Look up the keys in the source rows
// and store values in cache
Object[] storeData = new Object[data.nrCachedFields];
// Add key field
if (rowData[data.indexOfCachedFields[0]] == null) {
storeData[0] = "";
} else {
ValueMetaInterface fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(data.indexOfCachedFields[0]);
if (fromStreamRowMeta.isStorageBinaryString()) {
storeData[0] = fromStreamRowMeta.convertToNormalStorageType(rowData[data.indexOfCachedFields[0]]);
} else {
storeData[0] = rowData[data.indexOfCachedFields[0]];
}
}
// Add additional fields?
for (int i = 1; i < data.nrCachedFields; i++) {
ValueMetaInterface fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(data.indexOfCachedFields[i]);
if (fromStreamRowMeta.isStorageBinaryString()) {
storeData[i] = fromStreamRowMeta.convertToNormalStorageType(rowData[data.indexOfCachedFields[i]]);
} else {
storeData[i] = rowData[data.indexOfCachedFields[i]];
}
}
if (isDebug()) {
logDebug(BaseMessages.getString(PKG, "FuzzyMatch.Log.AddingValueToCache", data.infoCache.getString(storeData)));
}
addToCache(storeData);
rowData = getRowFrom(rowSet);
if (firstRun) {
firstRun = false;
}
}
return true;
}
Aggregations