Search in sources :

Example 46 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class AggregateRowsTest method createSourceRowSet.

private RowSet createSourceRowSet(String source) throws KettleValueException {
    ValueMetaInterface interface1 = mock(ValueMetaInterface.class);
    when(interface1.isNull(any())).thenReturn(false);
    RowMetaInterface sourceRowMeta = mock(RowMetaInterface.class);
    when(sourceRowMeta.getFieldNames()).thenReturn(new String[] { source });
    when(sourceRowMeta.indexOfValue(anyString())).thenReturn(0);
    when(sourceRowMeta.getValueMeta(anyInt())).thenReturn(interface1);
    RowSet sourceRowSet = stepMockHelper.getMockInputRowSet(new String[] { source });
    when(sourceRowSet.getRowMeta()).thenReturn(sourceRowMeta);
    return sourceRowSet;
}
Also used : RowSet(org.pentaho.di.core.RowSet) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Example 47 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class SortedMerge method getRowSorted.

/**
 * We read from all streams in the partition merge mode For that we need at least one row on all input rowsets... If
 * we don't have a row, we wait for one.
 *
 * TODO: keep the inputRowSets() list sorted and go from there. That should dramatically improve speed as you only
 * need half as many comparisons.
 *
 * @return the next row
 */
private synchronized Object[] getRowSorted() throws KettleException {
    if (first) {
        first = false;
        // Verify that socket connections to all the remote input steps are opened
        // before we start to read/write ...
        // 
        openRemoteInputStepSocketsOnce();
        // Read one row from all rowsets...
        // 
        data.sortedBuffer = new ArrayList<RowSetRow>();
        data.rowMeta = null;
        // PDI-1212:
        // If one of the inputRowSets holds a null row (the input yields
        // 0 rows), then the null rowSet is removed from the InputRowSet buffer.. (BaseStep.getRowFrom())
        // which throws this loop off by one (the next set never gets processed).
        // Instead of modifying BaseStep, I figure reversing the loop here would
        // effect change in less areas. If the reverse loop causes a problem, please
        // re-open http://jira.pentaho.com/browse/PDI-1212.
        List<RowSet> inputRowSets = getInputRowSets();
        for (int i = inputRowSets.size() - 1; i >= 0 && !isStopped(); i--) {
            RowSet rowSet = inputRowSets.get(i);
            Object[] row = getRowFrom(rowSet);
            if (row != null) {
                // Add this row to the sortedBuffer...
                // Which is not yet sorted, we'll get to that later.
                // 
                data.sortedBuffer.add(new RowSetRow(rowSet, rowSet.getRowMeta(), row));
                if (data.rowMeta == null) {
                    data.rowMeta = rowSet.getRowMeta().clone();
                }
                // 
                if (data.fieldIndices == null) {
                    // Get the indexes of the specified sort fields...
                    data.fieldIndices = new int[meta.getFieldName().length];
                    for (int f = 0; f < data.fieldIndices.length; f++) {
                        data.fieldIndices[f] = data.rowMeta.indexOfValue(meta.getFieldName()[f]);
                        if (data.fieldIndices[f] < 0) {
                            throw new KettleStepException("Unable to find fieldname [" + meta.getFieldName()[f] + "] in row : " + data.rowMeta);
                        }
                        data.rowMeta.getValueMeta(data.fieldIndices[f]).setSortedDescending(!meta.getAscending()[f]);
                    }
                }
            }
            data.comparator = new Comparator<RowSetRow>() {

                public int compare(RowSetRow o1, RowSetRow o2) {
                    try {
                        return o1.getRowMeta().compare(o1.getRowData(), o2.getRowData(), data.fieldIndices);
                    } catch (KettleValueException e) {
                        // TODO see if we should fire off alarms over here... Perhaps throw a RuntimeException.
                        return 0;
                    }
                }
            };
            // Now sort the sortedBuffer for the first time.
            // 
            Collections.sort(data.sortedBuffer, data.comparator);
        }
    }
    // 
    if (data.sortedBuffer.isEmpty()) {
        return null;
    }
    // now that we have all rows sorted, all we need to do is find out what the smallest row is.
    // The smallest row is the first in our case...
    // 
    RowSetRow smallestRow = data.sortedBuffer.get(0);
    data.sortedBuffer.remove(0);
    Object[] outputRowData = smallestRow.getRowData();
    // We read another row from the row set where the smallest row came from.
    // That we we exhaust all row sets.
    // 
    Object[] extraRow = getRowFrom(smallestRow.getRowSet());
    // 
    if (extraRow != null) {
        // Add this one to the sortedBuffer
        // 
        RowSetRow add = new RowSetRow(smallestRow.getRowSet(), smallestRow.getRowSet().getRowMeta(), extraRow);
        int index = Collections.binarySearch(data.sortedBuffer, add, data.comparator);
        if (index < 0) {
            data.sortedBuffer.add(-index - 1, add);
        } else {
            data.sortedBuffer.add(index, add);
        }
    }
    // 
    if (getTrans().isSafeModeEnabled()) {
        // for checking we need to get data and meta
        // 
        safeModeChecking(smallestRow.getRowMeta());
    }
    return outputRowData;
}
Also used : KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowSet(org.pentaho.di.core.RowSet) KettleValueException(org.pentaho.di.core.exception.KettleValueException)

Example 48 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class TransExecutor method collectExecutionResults.

@VisibleForTesting
void collectExecutionResults(Result result) throws KettleException {
    RowSet executionResultsRowSet = getData().getExecutionResultRowSet();
    if (meta.getExecutionResultTargetStepMeta() != null && executionResultsRowSet != null) {
        Object[] outputRow = RowDataUtil.allocateRowData(getData().getExecutionResultsOutputRowMeta().size());
        int idx = 0;
        if (!Utils.isEmpty(meta.getExecutionTimeField())) {
            outputRow[idx++] = Long.valueOf(System.currentTimeMillis() - getData().groupTimeStart);
        }
        if (!Utils.isEmpty(meta.getExecutionResultField())) {
            outputRow[idx++] = Boolean.valueOf(result.getResult());
        }
        if (!Utils.isEmpty(meta.getExecutionNrErrorsField())) {
            outputRow[idx++] = Long.valueOf(result.getNrErrors());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesReadField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesRead());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesWrittenField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesWritten());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesInputField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesInput());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesOutputField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesOutput());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesRejectedField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesRejected());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesUpdatedField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesUpdated());
        }
        if (!Utils.isEmpty(meta.getExecutionLinesDeletedField())) {
            outputRow[idx++] = Long.valueOf(result.getNrLinesDeleted());
        }
        if (!Utils.isEmpty(meta.getExecutionFilesRetrievedField())) {
            outputRow[idx++] = Long.valueOf(result.getNrFilesRetrieved());
        }
        if (!Utils.isEmpty(meta.getExecutionExitStatusField())) {
            outputRow[idx++] = Long.valueOf(result.getExitStatus());
        }
        if (!Utils.isEmpty(meta.getExecutionLogTextField())) {
            String channelId = getData().getExecutorTrans().getLogChannelId();
            String logText = KettleLogStore.getAppender().getBuffer(channelId, false).toString();
            outputRow[idx++] = logText;
        }
        if (!Utils.isEmpty(meta.getExecutionLogChannelIdField())) {
            outputRow[idx++] = getData().getExecutorTrans().getLogChannelId();
        }
        putRowTo(getData().getExecutionResultsOutputRowMeta(), outputRow, executionResultsRowSet);
    }
}
Also used : RowSet(org.pentaho.di.core.RowSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 49 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class TransExecutor method processRow.

/**
 * Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we
 * look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one
 * MappingInput and one MappingOutput step in the TransExecutor.
 */
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    try {
        meta = (TransExecutorMeta) smi;
        setData((TransExecutorData) sdi);
        TransExecutorData transExecutorData = getData();
        // Wait for a row...
        Object[] row = getRow();
        if (row == null) {
            executeTransformation();
            setOutputDone();
            return false;
        }
        if (first) {
            first = false;
            initOnFirstProcessingIteration();
        }
        RowSet executorStepOutputRowSet = transExecutorData.getExecutorStepOutputRowSet();
        if (transExecutorData.getExecutorStepOutputRowMeta() != null && executorStepOutputRowSet != null) {
            putRowTo(transExecutorData.getExecutorStepOutputRowMeta(), row, executorStepOutputRowSet);
        }
        // Grouping by field and execution time works ONLY if grouping by size is disabled.
        if (transExecutorData.groupSize < 0) {
            if (transExecutorData.groupFieldIndex >= 0) {
                // grouping by field
                Object groupFieldData = row[transExecutorData.groupFieldIndex];
                if (transExecutorData.prevGroupFieldData != null) {
                    if (transExecutorData.groupFieldMeta.compare(transExecutorData.prevGroupFieldData, groupFieldData) != 0) {
                        executeTransformation();
                    }
                }
                transExecutorData.prevGroupFieldData = groupFieldData;
            } else if (transExecutorData.groupTime > 0) {
                // grouping by execution time
                long now = System.currentTimeMillis();
                if (now - transExecutorData.groupTimeStart >= transExecutorData.groupTime) {
                    executeTransformation();
                }
            }
        }
        // Add next value AFTER transformation execution, in case we are grouping by field (see PDI-14958),
        // and BEFORE checking size of a group, in case we are grouping by size (see PDI-14121).
        // should we clone for safety?
        transExecutorData.groupBuffer.add(new RowMetaAndData(getInputRowMeta(), row));
        // If group buffer size exceeds specified limit, then execute transformation and flush group buffer.
        if (transExecutorData.groupSize > 0) {
            if (transExecutorData.groupBuffer.size() >= transExecutorData.groupSize) {
                executeTransformation();
            }
        }
        return true;
    } catch (Exception e) {
        throw new KettleException(BaseMessages.getString(PKG, "TransExecutor.UnexpectedError"), e);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) RowMetaAndData(org.pentaho.di.core.RowMetaAndData) RowSet(org.pentaho.di.core.RowSet) KettleException(org.pentaho.di.core.exception.KettleException)

Example 50 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class TransformClassBase method findInfoRowSet.

public RowSet findInfoRowSet(String tag) throws KettleException {
    if (tag == null) {
        return null;
    }
    String stepname = data.infoMap.get(tag);
    if (Utils.isEmpty(stepname)) {
        throw new KettleException(BaseMessages.getString(PKG, "TransformClassBase.Exception.UnableToFindInfoStepNameForTag", tag));
    }
    RowSet rowSet = findInputRowSet(stepname);
    if (rowSet == null) {
        throw new KettleException(BaseMessages.getString(PKG, "TransformClassBase.Exception.UnableToFindInfoRowSetForStep", stepname));
    }
    return rowSet;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet)

Aggregations

RowSet (org.pentaho.di.core.RowSet)109 Test (org.junit.Test)43 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)40 RowMeta (org.pentaho.di.core.row.RowMeta)34 QueueRowSet (org.pentaho.di.core.QueueRowSet)26 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)25 KettleException (org.pentaho.di.core.exception.KettleException)23 BlockingRowSet (org.pentaho.di.core.BlockingRowSet)21 KettleStepException (org.pentaho.di.core.exception.KettleStepException)19 ArrayList (java.util.ArrayList)16 StepInterface (org.pentaho.di.trans.step.StepInterface)13 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)12 StepMeta (org.pentaho.di.trans.step.StepMeta)11 SingleRowRowSet (org.pentaho.di.core.SingleRowRowSet)10 ValueMetaInteger (org.pentaho.di.core.row.value.ValueMetaInteger)9 RowAdapter (org.pentaho.di.trans.step.RowAdapter)9 Matchers.anyString (org.mockito.Matchers.anyString)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)7 IOException (java.io.IOException)6 ValueMetaNumber (org.pentaho.di.core.row.value.ValueMetaNumber)6