use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class AggregateRowsTest method createSourceRowSet.
private RowSet createSourceRowSet(String source) throws KettleValueException {
ValueMetaInterface interface1 = mock(ValueMetaInterface.class);
when(interface1.isNull(any())).thenReturn(false);
RowMetaInterface sourceRowMeta = mock(RowMetaInterface.class);
when(sourceRowMeta.getFieldNames()).thenReturn(new String[] { source });
when(sourceRowMeta.indexOfValue(anyString())).thenReturn(0);
when(sourceRowMeta.getValueMeta(anyInt())).thenReturn(interface1);
RowSet sourceRowSet = stepMockHelper.getMockInputRowSet(new String[] { source });
when(sourceRowSet.getRowMeta()).thenReturn(sourceRowMeta);
return sourceRowSet;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class SortedMerge method getRowSorted.
/**
* We read from all streams in the partition merge mode For that we need at least one row on all input rowsets... If
* we don't have a row, we wait for one.
*
* TODO: keep the inputRowSets() list sorted and go from there. That should dramatically improve speed as you only
* need half as many comparisons.
*
* @return the next row
*/
private synchronized Object[] getRowSorted() throws KettleException {
if (first) {
first = false;
// Verify that socket connections to all the remote input steps are opened
// before we start to read/write ...
//
openRemoteInputStepSocketsOnce();
// Read one row from all rowsets...
//
data.sortedBuffer = new ArrayList<RowSetRow>();
data.rowMeta = null;
// PDI-1212:
// If one of the inputRowSets holds a null row (the input yields
// 0 rows), then the null rowSet is removed from the InputRowSet buffer.. (BaseStep.getRowFrom())
// which throws this loop off by one (the next set never gets processed).
// Instead of modifying BaseStep, I figure reversing the loop here would
// effect change in less areas. If the reverse loop causes a problem, please
// re-open http://jira.pentaho.com/browse/PDI-1212.
List<RowSet> inputRowSets = getInputRowSets();
for (int i = inputRowSets.size() - 1; i >= 0 && !isStopped(); i--) {
RowSet rowSet = inputRowSets.get(i);
Object[] row = getRowFrom(rowSet);
if (row != null) {
// Add this row to the sortedBuffer...
// Which is not yet sorted, we'll get to that later.
//
data.sortedBuffer.add(new RowSetRow(rowSet, rowSet.getRowMeta(), row));
if (data.rowMeta == null) {
data.rowMeta = rowSet.getRowMeta().clone();
}
//
if (data.fieldIndices == null) {
// Get the indexes of the specified sort fields...
data.fieldIndices = new int[meta.getFieldName().length];
for (int f = 0; f < data.fieldIndices.length; f++) {
data.fieldIndices[f] = data.rowMeta.indexOfValue(meta.getFieldName()[f]);
if (data.fieldIndices[f] < 0) {
throw new KettleStepException("Unable to find fieldname [" + meta.getFieldName()[f] + "] in row : " + data.rowMeta);
}
data.rowMeta.getValueMeta(data.fieldIndices[f]).setSortedDescending(!meta.getAscending()[f]);
}
}
}
data.comparator = new Comparator<RowSetRow>() {
public int compare(RowSetRow o1, RowSetRow o2) {
try {
return o1.getRowMeta().compare(o1.getRowData(), o2.getRowData(), data.fieldIndices);
} catch (KettleValueException e) {
// TODO see if we should fire off alarms over here... Perhaps throw a RuntimeException.
return 0;
}
}
};
// Now sort the sortedBuffer for the first time.
//
Collections.sort(data.sortedBuffer, data.comparator);
}
}
//
if (data.sortedBuffer.isEmpty()) {
return null;
}
// now that we have all rows sorted, all we need to do is find out what the smallest row is.
// The smallest row is the first in our case...
//
RowSetRow smallestRow = data.sortedBuffer.get(0);
data.sortedBuffer.remove(0);
Object[] outputRowData = smallestRow.getRowData();
// We read another row from the row set where the smallest row came from.
// That we we exhaust all row sets.
//
Object[] extraRow = getRowFrom(smallestRow.getRowSet());
//
if (extraRow != null) {
// Add this one to the sortedBuffer
//
RowSetRow add = new RowSetRow(smallestRow.getRowSet(), smallestRow.getRowSet().getRowMeta(), extraRow);
int index = Collections.binarySearch(data.sortedBuffer, add, data.comparator);
if (index < 0) {
data.sortedBuffer.add(-index - 1, add);
} else {
data.sortedBuffer.add(index, add);
}
}
//
if (getTrans().isSafeModeEnabled()) {
// for checking we need to get data and meta
//
safeModeChecking(smallestRow.getRowMeta());
}
return outputRowData;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class TransExecutor method collectExecutionResults.
@VisibleForTesting
void collectExecutionResults(Result result) throws KettleException {
RowSet executionResultsRowSet = getData().getExecutionResultRowSet();
if (meta.getExecutionResultTargetStepMeta() != null && executionResultsRowSet != null) {
Object[] outputRow = RowDataUtil.allocateRowData(getData().getExecutionResultsOutputRowMeta().size());
int idx = 0;
if (!Utils.isEmpty(meta.getExecutionTimeField())) {
outputRow[idx++] = Long.valueOf(System.currentTimeMillis() - getData().groupTimeStart);
}
if (!Utils.isEmpty(meta.getExecutionResultField())) {
outputRow[idx++] = Boolean.valueOf(result.getResult());
}
if (!Utils.isEmpty(meta.getExecutionNrErrorsField())) {
outputRow[idx++] = Long.valueOf(result.getNrErrors());
}
if (!Utils.isEmpty(meta.getExecutionLinesReadField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesRead());
}
if (!Utils.isEmpty(meta.getExecutionLinesWrittenField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesWritten());
}
if (!Utils.isEmpty(meta.getExecutionLinesInputField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesInput());
}
if (!Utils.isEmpty(meta.getExecutionLinesOutputField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesOutput());
}
if (!Utils.isEmpty(meta.getExecutionLinesRejectedField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesRejected());
}
if (!Utils.isEmpty(meta.getExecutionLinesUpdatedField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesUpdated());
}
if (!Utils.isEmpty(meta.getExecutionLinesDeletedField())) {
outputRow[idx++] = Long.valueOf(result.getNrLinesDeleted());
}
if (!Utils.isEmpty(meta.getExecutionFilesRetrievedField())) {
outputRow[idx++] = Long.valueOf(result.getNrFilesRetrieved());
}
if (!Utils.isEmpty(meta.getExecutionExitStatusField())) {
outputRow[idx++] = Long.valueOf(result.getExitStatus());
}
if (!Utils.isEmpty(meta.getExecutionLogTextField())) {
String channelId = getData().getExecutorTrans().getLogChannelId();
String logText = KettleLogStore.getAppender().getBuffer(channelId, false).toString();
outputRow[idx++] = logText;
}
if (!Utils.isEmpty(meta.getExecutionLogChannelIdField())) {
outputRow[idx++] = getData().getExecutorTrans().getLogChannelId();
}
putRowTo(getData().getExecutionResultsOutputRowMeta(), outputRow, executionResultsRowSet);
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class TransExecutor method processRow.
/**
* Process a single row. In our case, we send one row of data to a piece of transformation. In the transformation, we
* look up the MappingInput step to send our rows to it. As a consequence, for the time being, there can only be one
* MappingInput and one MappingOutput step in the TransExecutor.
*/
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
try {
meta = (TransExecutorMeta) smi;
setData((TransExecutorData) sdi);
TransExecutorData transExecutorData = getData();
// Wait for a row...
Object[] row = getRow();
if (row == null) {
executeTransformation();
setOutputDone();
return false;
}
if (first) {
first = false;
initOnFirstProcessingIteration();
}
RowSet executorStepOutputRowSet = transExecutorData.getExecutorStepOutputRowSet();
if (transExecutorData.getExecutorStepOutputRowMeta() != null && executorStepOutputRowSet != null) {
putRowTo(transExecutorData.getExecutorStepOutputRowMeta(), row, executorStepOutputRowSet);
}
// Grouping by field and execution time works ONLY if grouping by size is disabled.
if (transExecutorData.groupSize < 0) {
if (transExecutorData.groupFieldIndex >= 0) {
// grouping by field
Object groupFieldData = row[transExecutorData.groupFieldIndex];
if (transExecutorData.prevGroupFieldData != null) {
if (transExecutorData.groupFieldMeta.compare(transExecutorData.prevGroupFieldData, groupFieldData) != 0) {
executeTransformation();
}
}
transExecutorData.prevGroupFieldData = groupFieldData;
} else if (transExecutorData.groupTime > 0) {
// grouping by execution time
long now = System.currentTimeMillis();
if (now - transExecutorData.groupTimeStart >= transExecutorData.groupTime) {
executeTransformation();
}
}
}
// Add next value AFTER transformation execution, in case we are grouping by field (see PDI-14958),
// and BEFORE checking size of a group, in case we are grouping by size (see PDI-14121).
// should we clone for safety?
transExecutorData.groupBuffer.add(new RowMetaAndData(getInputRowMeta(), row));
// If group buffer size exceeds specified limit, then execute transformation and flush group buffer.
if (transExecutorData.groupSize > 0) {
if (transExecutorData.groupBuffer.size() >= transExecutorData.groupSize) {
executeTransformation();
}
}
return true;
} catch (Exception e) {
throw new KettleException(BaseMessages.getString(PKG, "TransExecutor.UnexpectedError"), e);
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class TransformClassBase method findInfoRowSet.
public RowSet findInfoRowSet(String tag) throws KettleException {
if (tag == null) {
return null;
}
String stepname = data.infoMap.get(tag);
if (Utils.isEmpty(stepname)) {
throw new KettleException(BaseMessages.getString(PKG, "TransformClassBase.Exception.UnableToFindInfoStepNameForTag", tag));
}
RowSet rowSet = findInputRowSet(stepname);
if (rowSet == null) {
throw new KettleException(BaseMessages.getString(PKG, "TransformClassBase.Exception.UnableToFindInfoRowSetForStep", stepname));
}
return rowSet;
}
Aggregations