Search in sources :

Example 61 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class Mapping method processRow.

/**
 * Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we
 * look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one
 * MappingInput and one MappingOutput step in the Mapping.
 */
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    try {
        meta = (MappingMeta) smi;
        setData((MappingData) sdi);
        MappingInput[] mappingInputs = getData().getMappingTrans().findMappingInput();
        MappingOutput[] mappingOutputs = getData().getMappingTrans().findMappingOutput();
        getData().wasStarted = true;
        switch(getData().mappingTransMeta.getTransformationType()) {
            case Normal:
            case SerialSingleThreaded:
                // Before we start, let's see if there are loose ends to tie up...
                // 
                List<RowSet> inputRowSets = getInputRowSets();
                if (!inputRowSets.isEmpty()) {
                    for (RowSet rowSet : inputRowSets) {
                        // 
                        if (mappingInputs.length == 1) {
                            // Simple case: only one input mapping. Move the RowSet over
                            // 
                            mappingInputs[0].addRowSetToInputRowSets(rowSet);
                        } else {
                            // 
                            throw new KettleException("Unsupported situation detected where more than one Mapping Input step needs to be handled.  " + "To solve it, insert a dummy step before the mapping step.");
                        }
                    }
                    clearInputRowSets();
                }
                // 
                if (!getRemoteInputSteps().isEmpty()) {
                    // 
                    for (RemoteStep remoteStep : getRemoteInputSteps()) {
                        // 
                        if (mappingInputs.length == 1) {
                            // Simple case: only one input mapping. Move the remote step over
                            // 
                            mappingInputs[0].getRemoteInputSteps().add(remoteStep);
                        } else {
                            // 
                            throw new KettleException("Unsupported situation detected where a remote input step is expecting data " + "to end up in a particular Mapping Input step of a sub-transformation.  " + "To solve it, insert a dummy step before the mapping.");
                        }
                    }
                    getRemoteInputSteps().clear();
                }
                // Do the same thing for output row sets
                // 
                List<RowSet> outputRowSets = getOutputRowSets();
                if (!outputRowSets.isEmpty()) {
                    for (RowSet rowSet : outputRowSets) {
                        // 
                        if (mappingOutputs.length == 1) {
                            // Simple case: only one output mapping. Move the RowSet over
                            // 
                            mappingOutputs[0].addRowSetToOutputRowSets(rowSet);
                        } else {
                            // 
                            throw new KettleException("Unsupported situation detected where more than one Mapping Output step needs to be handled.  " + "To solve it, insert a dummy step after the mapping step.");
                        }
                    }
                    clearOutputRowSets();
                }
                // 
                if (!getRemoteOutputSteps().isEmpty()) {
                    // 
                    for (RemoteStep remoteStep : getRemoteOutputSteps()) {
                        // 
                        if (mappingOutputs.length == 1) {
                            // Simple case: only one output mapping. Move the remote step over
                            // 
                            mappingOutputs[0].getRemoteOutputSteps().add(remoteStep);
                        } else {
                            // 
                            throw new KettleException("Unsupported situation detected where a remote output step is expecting data " + "to end up in a particular Mapping Output step of a sub-transformation.  " + "To solve it, insert a dummy step after the mapping.");
                        }
                    }
                    getRemoteOutputSteps().clear();
                }
                // Start the mapping/sub-transformation threads
                // 
                getData().getMappingTrans().startThreads();
                // 
                if (getTransMeta().getTransformationType() == TransformationType.Normal) {
                    getData().getMappingTrans().waitUntilFinished();
                    // Set some statistics from the mapping...
                    // This will show up in Spoon, etc.
                    // 
                    Result result = getData().getMappingTrans().getResult();
                    setErrors(result.getNrErrors());
                    setLinesRead(result.getNrLinesRead());
                    setLinesWritten(result.getNrLinesWritten());
                    setLinesInput(result.getNrLinesInput());
                    setLinesOutput(result.getNrLinesOutput());
                    setLinesUpdated(result.getNrLinesUpdated());
                    setLinesRejected(result.getNrLinesRejected());
                }
                return false;
            case SingleThreaded:
                if (mappingInputs.length > 1 || mappingOutputs.length > 1) {
                    throw new KettleException("Multiple input or output steps are not supported for a single threaded mapping.");
                }
                if ((log != null) && log.isDebug()) {
                    List<RowSet> mappingInputRowSets = mappingInputs[0].getInputRowSets();
                    log.logDebug("# of input buffers: " + mappingInputRowSets.size());
                    if (mappingInputRowSets.size() > 0) {
                        log.logDebug("Input buffer 0 size: " + mappingInputRowSets.get(0).size());
                    }
                }
                // Now execute one batch...Basic logging
                // 
                boolean result = getData().singleThreadedTransExcecutor.oneIteration();
                if (!result) {
                    getData().singleThreadedTransExcecutor.dispose();
                    setOutputDone();
                    return false;
                }
                return true;
            default:
                throw new KettleException("Transformation type '" + getData().mappingTransMeta.getTransformationType().getDescription() + "' is an unsupported transformation type for a mapping");
        }
    } catch (Throwable t) {
        // 
        if (getData().getMappingTrans() != null) {
            getData().getMappingTrans().stopAll();
        }
        // 
        throw new KettleException(t);
    }
}
Also used : MappingInput(org.pentaho.di.trans.steps.mappinginput.MappingInput) RemoteStep(org.pentaho.di.trans.step.RemoteStep) KettleException(org.pentaho.di.core.exception.KettleException) RowSet(org.pentaho.di.core.RowSet) MappingOutput(org.pentaho.di.trans.steps.mappingoutput.MappingOutput) Result(org.pentaho.di.core.Result)

Example 62 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method findOutputRowSet.

/**
 * Find an output rowset in a running transformation. It will also look at the "to" step to see if this is a mapping.
 * If it is, it will find the appropriate rowset in that transformation.
 *
 * @param from
 * @param fromcopy
 * @param to
 * @param tocopy
 * @return The rowset or null if none is found.
 */
public RowSet findOutputRowSet(String from, int fromcopy, String to, int tocopy) {
    synchronized (outputRowSetsLock) {
        for (RowSet rs : outputRowSets) {
            if (rs.getOriginStepName().equalsIgnoreCase(from) && rs.getDestinationStepName().equalsIgnoreCase(to) && rs.getOriginStepCopy() == fromcopy && rs.getDestinationStepCopy() == tocopy) {
                return rs;
            }
        }
    }
    // See if the rowset is part of the input of a mapping target step...
    // 
    // Lookup step "To"
    // 
    StepMeta mappingStep = transMeta.findStep(to);
    // 
    if (mappingStep != null && mappingStep.isMapping()) {
        // In this case we can cast the step thread to a Mapping...
        // 
        List<StepInterface> baseSteps = trans.findBaseSteps(to);
        if (baseSteps.size() == 1) {
            Mapping mapping = (Mapping) baseSteps.get(0);
            // Find the appropriate rowset in the mapping...
            // The rowset in question has been passed over to a Mapping Input step inside the Mapping transformation.
            // 
            MappingInput[] inputs = mapping.getMappingTrans().findMappingInput();
            for (MappingInput input : inputs) {
                for (RowSet rs : input.getInputRowSets()) {
                    // 
                    if (rs.getOriginStepName().equalsIgnoreCase(from)) {
                        return rs;
                    }
                }
            }
        }
    }
    // 
    return null;
}
Also used : MappingInput(org.pentaho.di.trans.steps.mappinginput.MappingInput) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) Mapping(org.pentaho.di.trans.steps.mapping.Mapping)

Example 63 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method verifyInputDeadLock.

/**
 * - A step sees that it can't get a new row from input in the step. - Then it verifies that there is more than one
 * input row set and that at least one is full and at least one is empty. - Then it finds a step in the transformation
 * (situated before the reader step) which has at least one full and one empty output row set. - If this situation
 * presents itself and if it happens twice with the same rows read count (meaning: stalled reading step) we throw an
 * exception. For the attached example that exception is:
 *
 * @throws KettleStepException
 */
protected void verifyInputDeadLock() throws KettleStepException {
    RowSet inputFull = null;
    RowSet inputEmpty = null;
    for (RowSet rowSet : getInputRowSets()) {
        if (rowSet.size() == transMeta.getSizeRowset()) {
            inputFull = rowSet;
        } else if (rowSet.size() == 0) {
            inputEmpty = rowSet;
        }
    }
    if (inputFull != null && inputEmpty != null) {
        // - one output is empty
        for (StepMetaDataCombi combi : trans.getSteps()) {
            int inputSize = 0;
            List<RowSet> combiInputRowSets = combi.step.getInputRowSets();
            int totalSize = combiInputRowSets.size() * transMeta.getSizeRowset();
            for (RowSet rowSet : combiInputRowSets) {
                inputSize += rowSet.size();
            }
            // All full probably means a stalled step.
            List<RowSet> combiOutputRowSets = combi.step.getOutputRowSets();
            if (inputSize > 0 && inputSize == totalSize && combiOutputRowSets.size() > 1) {
                RowSet outputFull = null;
                RowSet outputEmpty = null;
                for (RowSet rowSet : combiOutputRowSets) {
                    if (rowSet.size() == transMeta.getSizeRowset()) {
                        outputFull = rowSet;
                    } else if (rowSet.size() == 0) {
                        outputEmpty = rowSet;
                    }
                }
                if (outputFull != null && outputEmpty != null) {
                    // 
                    if (transMeta.findPrevious(stepMeta, combi.stepMeta)) {
                        throw new KettleStepException("A deadlock was detected between steps '" + combi.stepname + "' and '" + stepname + "'.  The steps are both waiting for each other because a series of row set buffers filled up.");
                    }
                }
            }
        }
    }
}
Also used : KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet)

Example 64 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method openRemoteOutputStepSocketsOnce.

/**
 * Opens socket connections to the remote output steps of this step. <br>
 * This method is called in method initBeforeStart() because it needs to connect to the server sockets (remote steps)
 * as soon as possible to avoid time-out situations. <br>
 * This action is executed only once.
 *
 * @throws KettleStepException
 */
protected void openRemoteOutputStepSocketsOnce() throws KettleStepException {
    if (!remoteOutputSteps.isEmpty()) {
        if (!remoteOutputStepsInitialized) {
            synchronized (outputRowSetsLock) {
                // 
                for (int c = 0; c < outputRowSets.size(); c++) {
                    RowSet rowSet = outputRowSets.get(c);
                    rowSet.setRemoteSlaveServerName(getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME));
                    if (getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME) == null) {
                        throw new KettleStepException("Variable '" + Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME + "' is not defined.");
                    }
                }
                // 
                for (RemoteStep remoteStep : remoteOutputSteps) {
                    try {
                        if (remoteStep.getTargetSlaveServerName() == null) {
                            throw new KettleStepException("The target slave server name is not defined for remote output step: " + remoteStep);
                        }
                        BlockingRowSet rowSet = remoteStep.openWriterSocket();
                        if (log.isDetailed()) {
                            logDetailed(BaseMessages.getString(PKG, "BaseStep.Log.OpenedWriterSocketToRemoteStep", remoteStep));
                        }
                        outputRowSets.add(rowSet);
                    } catch (IOException e) {
                        throw new KettleStepException("Error opening writer socket to remote step '" + remoteStep + "'", e);
                    }
                }
            }
            remoteOutputStepsInitialized = true;
        }
    }
}
Also used : KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) IOException(java.io.IOException)

Example 65 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class FuzzyMatch method readLookupValues.

private boolean readLookupValues() throws KettleException {
    data.infoStream = meta.getStepIOMeta().getInfoStreams().get(0);
    if (data.infoStream.getStepMeta() == null) {
        logError(BaseMessages.getString(PKG, "FuzzyMatch.Log.NoLookupStepSpecified"));
        return false;
    }
    if (isDetailed()) {
        logDetailed(BaseMessages.getString(PKG, "FuzzyMatch.Log.ReadingFromStream") + data.infoStream.getStepname() + "]");
    }
    boolean firstRun = true;
    // Which row set do we read from?
    // 
    RowSet rowSet = findInputRowSet(data.infoStream.getStepname());
    // rows are originating from "lookup_from"
    Object[] rowData = getRowFrom(rowSet);
    while (rowData != null) {
        if (firstRun) {
            data.infoMeta = rowSet.getRowMeta().clone();
            // Check lookup field
            int indexOfLookupField = data.infoMeta.indexOfValue(environmentSubstitute(meta.getLookupField()));
            if (indexOfLookupField < 0) {
                // The field is unreachable !
                throw new KettleException(BaseMessages.getString(PKG, "FuzzyMatch.Exception.CouldnotFindLookField", meta.getLookupField()));
            }
            data.infoCache = new RowMeta();
            ValueMetaInterface keyValueMeta = data.infoMeta.getValueMeta(indexOfLookupField);
            keyValueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
            data.infoCache.addValueMeta(keyValueMeta);
            // Add key
            data.indexOfCachedFields[0] = indexOfLookupField;
            // Check additional fields
            if (data.addAdditionalFields) {
                ValueMetaInterface additionalFieldValueMeta;
                for (int i = 0; i < meta.getValue().length; i++) {
                    int fi = i + 1;
                    data.indexOfCachedFields[fi] = data.infoMeta.indexOfValue(meta.getValue()[i]);
                    if (data.indexOfCachedFields[fi] < 0) {
                        // The field is unreachable !
                        throw new KettleException(BaseMessages.getString(PKG, "FuzzyMatch.Exception.CouldnotFindLookField", meta.getValue()[i]));
                    }
                    additionalFieldValueMeta = data.infoMeta.getValueMeta(data.indexOfCachedFields[fi]);
                    additionalFieldValueMeta.setStorageType(ValueMetaInterface.STORAGE_TYPE_NORMAL);
                    data.infoCache.addValueMeta(additionalFieldValueMeta);
                }
                data.nrCachedFields += meta.getValue().length;
            }
        }
        if (log.isRowLevel()) {
            logRowlevel(BaseMessages.getString(PKG, "FuzzyMatch.Log.ReadLookupRow") + rowSet.getRowMeta().getString(rowData));
        }
        // Look up the keys in the source rows
        // and store values in cache
        Object[] storeData = new Object[data.nrCachedFields];
        // Add key field
        if (rowData[data.indexOfCachedFields[0]] == null) {
            storeData[0] = "";
        } else {
            ValueMetaInterface fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(data.indexOfCachedFields[0]);
            if (fromStreamRowMeta.isStorageBinaryString()) {
                storeData[0] = fromStreamRowMeta.convertToNormalStorageType(rowData[data.indexOfCachedFields[0]]);
            } else {
                storeData[0] = rowData[data.indexOfCachedFields[0]];
            }
        }
        // Add additional fields?
        for (int i = 1; i < data.nrCachedFields; i++) {
            ValueMetaInterface fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(data.indexOfCachedFields[i]);
            if (fromStreamRowMeta.isStorageBinaryString()) {
                storeData[i] = fromStreamRowMeta.convertToNormalStorageType(rowData[data.indexOfCachedFields[i]]);
            } else {
                storeData[i] = rowData[data.indexOfCachedFields[i]];
            }
        }
        if (isDebug()) {
            logDebug(BaseMessages.getString(PKG, "FuzzyMatch.Log.AddingValueToCache", data.infoCache.getString(storeData)));
        }
        addToCache(storeData);
        rowData = getRowFrom(rowSet);
        if (firstRun) {
            firstRun = false;
        }
    }
    return true;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) RowMeta(org.pentaho.di.core.row.RowMeta) RowSet(org.pentaho.di.core.RowSet) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Aggregations

RowSet (org.pentaho.di.core.RowSet)109 Test (org.junit.Test)43 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)40 RowMeta (org.pentaho.di.core.row.RowMeta)34 QueueRowSet (org.pentaho.di.core.QueueRowSet)26 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)25 KettleException (org.pentaho.di.core.exception.KettleException)23 BlockingRowSet (org.pentaho.di.core.BlockingRowSet)21 KettleStepException (org.pentaho.di.core.exception.KettleStepException)19 ArrayList (java.util.ArrayList)16 StepInterface (org.pentaho.di.trans.step.StepInterface)13 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)12 StepMeta (org.pentaho.di.trans.step.StepMeta)11 SingleRowRowSet (org.pentaho.di.core.SingleRowRowSet)10 ValueMetaInteger (org.pentaho.di.core.row.value.ValueMetaInteger)9 RowAdapter (org.pentaho.di.trans.step.RowAdapter)9 Matchers.anyString (org.mockito.Matchers.anyString)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)7 IOException (java.io.IOException)6 ValueMetaNumber (org.pentaho.di.core.row.value.ValueMetaNumber)6