Search in sources :

Example 11 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class JoinRows method getRowData.

/**
 * Get a row of data from the indicated rowset or buffer (memory/disk)
 *
 * @param filenr
 *          The rowset or buffer to read a row from
 * @return a row of data
 * @throws KettleException
 *           in case something goes wrong
 */
public Object[] getRowData(int filenr) throws KettleException {
    data.restart[filenr] = false;
    Object[] rowData = null;
    // Do we read from the first rowset or a file?
    if (filenr == 0) {
        // Rowset 0:
        RowSet rowSet = getFirstInputRowSet();
        rowData = getRowFrom(rowSet);
        if (rowData != null) {
            data.fileRowMeta[0] = rowSet.getRowMeta();
        }
        if (log.isRowLevel()) {
            logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromStream") + (rowData == null ? "<null>" : data.fileRowMeta[0].getString(rowData)));
        }
    } else {
        if (data.cache[filenr] == null) {
            // See if we need to open the file?
            if (data.dataInputStream[filenr] == null) {
                try {
                    data.fileInputStream[filenr] = new FileInputStream(data.file[filenr]);
                    data.dataInputStream[filenr] = new DataInputStream(data.fileInputStream[filenr]);
                } catch (FileNotFoundException fnfe) {
                    logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToFindOrOpenTemporaryFile") + data.file[filenr] + "] : " + fnfe.toString());
                    setErrors(1);
                    stopAll();
                    return null;
                }
            }
            if (data.size[filenr] == 0) {
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
                }
                return null;
            }
            try {
                rowData = data.fileRowMeta[filenr].readData(data.dataInputStream[filenr]);
            } catch (KettleFileException e) {
                logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
                setErrors(1);
                stopAll();
                return null;
            } catch (SocketTimeoutException e) {
                logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
                setErrors(1);
                stopAll();
                return null;
            }
            if (log.isRowLevel()) {
                logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromFile") + filenr + " : " + data.fileRowMeta[filenr].getString(rowData));
            }
            data.position[filenr]++;
            // The file will then be re-opened if needed later on.
            if (data.position[filenr] >= data.size[filenr]) {
                try {
                    data.dataInputStream[filenr].close();
                    data.fileInputStream[filenr].close();
                    data.dataInputStream[filenr] = null;
                    data.fileInputStream[filenr] = null;
                    data.position[filenr] = 0;
                    // indicate that we restarted.
                    data.restart[filenr] = true;
                } catch (IOException ioe) {
                    logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToCloseInputStream") + data.file[filenr] + "] : " + ioe.toString());
                    setErrors(1);
                    stopAll();
                    return null;
                }
            }
        } else {
            if (data.size[filenr] == 0) {
                if (log.isBasic()) {
                    logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
                }
                return null;
            }
            rowData = data.cache[filenr].get(data.position[data.filenr]);
            // Don't forget to clone the data to protect it against data alteration downstream.
            // 
            rowData = data.fileRowMeta[filenr].cloneRow(rowData);
            data.position[filenr]++;
            // The file will then be re-opened if needed later on.
            if (data.position[filenr] >= data.size[filenr]) {
                data.position[filenr] = 0;
                // indicate that we restarted.
                data.restart[filenr] = true;
            }
        }
    }
    return rowData;
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) SocketTimeoutException(java.net.SocketTimeoutException) RowSet(org.pentaho.di.core.RowSet) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream)

Example 12 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class JoinRows method initialize.

/*
   * Allocate input streams and create the temporary files...
   */
@SuppressWarnings("unchecked")
public void initialize() throws KettleException {
    // Since we haven't called getRow() yet we need to wait until we have all input row sets available to us.
    // 
    openRemoteInputStepSocketsOnce();
    try {
        // Start with the caching of the data, write later...
        data.caching = true;
        // Start at file 1, skip 0 for speed!
        data.filenr = 1;
        // See if a main step is supplied: in that case move the corresponding rowset to position 0
        swapFirstInputRowSetIfExists(meta.getMainStepname());
        List<RowSet> inputRowSets = getInputRowSets();
        int rowSetsSize = inputRowSets.size();
        // ** INPUT SIDE **
        data.file = new File[rowSetsSize];
        data.fileInputStream = new FileInputStream[rowSetsSize];
        data.dataInputStream = new DataInputStream[rowSetsSize];
        data.size = new int[rowSetsSize];
        data.fileRowMeta = new RowMetaInterface[rowSetsSize];
        data.joinrow = new Object[rowSetsSize][];
        data.rs = new RowSet[rowSetsSize];
        data.cache = new List[rowSetsSize];
        data.position = new int[rowSetsSize];
        data.fileOutputStream = new FileOutputStream[rowSetsSize];
        data.dataOutputStream = new DataOutputStream[rowSetsSize];
        data.restart = new boolean[rowSetsSize];
        for (int i = 1; i < rowSetsSize; i++) {
            String directoryName = environmentSubstitute(meta.getDirectory());
            File file = null;
            if (directoryName != null) {
                file = new File(directoryName);
            }
            data.file[i] = File.createTempFile(meta.getPrefix(), ".tmp", file);
            data.size[i] = 0;
            data.rs[i] = inputRowSets.get(i);
            data.cache[i] = null;
            // data.row[i] = null;
            data.position[i] = 0;
            data.dataInputStream[i] = null;
            data.dataOutputStream[i] = null;
            data.joinrow[i] = null;
            data.restart[i] = false;
        }
    } catch (Exception e) {
        throw new KettleException(BaseMessages.getString(PKG, "JoinRows.Log.ErrorCreatingTemporaryFiles"), e);
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) RowSet(org.pentaho.di.core.RowSet) File(java.io.File) KettleException(org.pentaho.di.core.exception.KettleException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) SocketTimeoutException(java.net.SocketTimeoutException) KettleFileException(org.pentaho.di.core.exception.KettleFileException)

Example 13 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class JoinRows method batchComplete.

@Override
public void batchComplete() throws KettleException {
    RowSet rowSet = getFirstInputRowSet();
    int repeats = 0;
    for (int i = 0; i < data.cache.length; i++) {
        if (repeats == 0) {
            repeats = 1;
        }
        if (data.cache[i] != null) {
            repeats *= data.cache[i].size();
        }
    }
    while (rowSet.size() > 0 && !isStopped()) {
        processRow(meta, data);
    }
    // 
    for (int i = 0; i < repeats; i++) {
        processRow(meta, data);
    }
}
Also used : RowSet(org.pentaho.di.core.RowSet)

Example 14 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class MultiMergeJoin method processFirstRow.

private boolean processFirstRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    meta = (MultiMergeJoinMeta) smi;
    data = (MultiMergeJoinData) sdi;
    TransMeta transMeta = getTransMeta();
    TransHopMeta transHopMeta;
    StepIOMetaInterface stepIOMeta = meta.getStepIOMeta();
    List<StreamInterface> infoStreams = stepIOMeta.getInfoStreams();
    StreamInterface stream;
    StepMeta toStepMeta = meta.getParentStepMeta();
    StepMeta fromStepMeta;
    ArrayList<String> inputStepNameList = new ArrayList<String>();
    String[] inputStepNames = meta.getInputSteps();
    String inputStepName;
    for (int i = 0; i < infoStreams.size(); i++) {
        inputStepName = inputStepNames[i];
        stream = infoStreams.get(i);
        fromStepMeta = stream.getStepMeta();
        if (fromStepMeta == null) {
            // should not arrive here, shoud typically have been caught by init.
            throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName));
        }
        // check the hop
        transHopMeta = transMeta.findTransHop(fromStepMeta, toStepMeta, true);
        // there is no hop: this is unexpected.
        if (transHopMeta == null) {
            // should not arrive here, shoud typically have been caught by init.
            throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName));
        } else if (transHopMeta.isEnabled()) {
            inputStepNameList.add(inputStepName);
        } else {
            logDetailed(BaseMessages.getString(PKG, "MultiMergeJoin.Log.IgnoringStep", inputStepName));
        }
    }
    int streamSize = inputStepNameList.size();
    if (streamSize == 0) {
        return false;
    }
    String keyField;
    String[] keyFields;
    data.rowSets = new RowSet[streamSize];
    RowSet rowSet;
    Object[] row;
    data.rows = new Object[streamSize][];
    data.metas = new RowMetaInterface[streamSize];
    data.rowLengths = new int[streamSize];
    MultiMergeJoinData.QueueComparator comparator = new MultiMergeJoinData.QueueComparator(data);
    data.queue = new PriorityQueue<MultiMergeJoinData.QueueEntry>(streamSize, comparator);
    data.results = new ArrayList<List<Object[]>>(streamSize);
    MultiMergeJoinData.QueueEntry queueEntry;
    data.queueEntries = new MultiMergeJoinData.QueueEntry[streamSize];
    data.drainIndices = new int[streamSize];
    data.keyNrs = new int[streamSize][];
    data.dummy = new Object[streamSize][];
    RowMetaInterface rowMeta;
    data.outputRowMeta = new RowMeta();
    for (int i = 0, j = 0; i < inputStepNames.length; i++) {
        inputStepName = inputStepNames[i];
        if (!inputStepNameList.contains(inputStepName)) {
            // ignore step with disabled hop.
            continue;
        }
        queueEntry = new MultiMergeJoinData.QueueEntry();
        queueEntry.index = j;
        data.queueEntries[j] = queueEntry;
        data.results.add(new ArrayList<Object[]>());
        rowSet = findInputRowSet(inputStepName);
        if (rowSet == null) {
            throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Exception.UnableToFindSpecifiedStep", inputStepName));
        }
        data.rowSets[j] = rowSet;
        row = getRowFrom(rowSet);
        data.rows[j] = row;
        if (row == null) {
            rowMeta = getTransMeta().getStepFields(inputStepName);
            data.metas[j] = rowMeta;
        } else {
            queueEntry.row = row;
            rowMeta = rowSet.getRowMeta();
            keyField = meta.getKeyFields()[i];
            String[] keyFieldParts = keyField.split(",");
            String keyFieldPart;
            data.keyNrs[j] = new int[keyFieldParts.length];
            for (int k = 0; k < keyFieldParts.length; k++) {
                keyFieldPart = keyFieldParts[k];
                data.keyNrs[j][k] = rowMeta.indexOfValue(keyFieldPart);
                if (data.keyNrs[j][k] < 0) {
                    String message = BaseMessages.getString(PKG, "MultiMergeJoin.Exception.UnableToFindFieldInReferenceStream", keyFieldPart, inputStepName);
                    logError(message);
                    throw new KettleStepException(message);
                }
            }
            data.metas[j] = rowMeta;
            data.queue.add(data.queueEntries[j]);
        }
        data.outputRowMeta.mergeRowMeta(rowMeta.clone());
        data.rowLengths[j] = rowMeta.size();
        data.dummy[j] = RowDataUtil.allocateRowData(rowMeta.size());
        j++;
    }
    return true;
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowMeta(org.pentaho.di.core.row.RowMeta) TransMeta(org.pentaho.di.trans.TransMeta) ArrayList(java.util.ArrayList) RowSet(org.pentaho.di.core.RowSet) StepIOMetaInterface(org.pentaho.di.trans.step.StepIOMetaInterface) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) ArrayList(java.util.ArrayList) List(java.util.List) StreamInterface(org.pentaho.di.trans.step.errorhandling.StreamInterface) StepMeta(org.pentaho.di.trans.step.StepMeta) TransHopMeta(org.pentaho.di.trans.TransHopMeta)

Example 15 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method handleGetRow.

private Object[] handleGetRow() throws KettleException {
    // 
    while (paused.get() && !stopped.get()) {
        try {
            Thread.sleep(100);
        } catch (InterruptedException e) {
            throw new KettleStepException(e);
        }
    }
    if (stopped.get()) {
        if (log.isDebug()) {
            logDebug(BaseMessages.getString(PKG, "BaseStep.Log.StopLookingForMoreRows"));
        }
        stopAll();
        return null;
    }
    // Small startup check
    // 
    waitUntilTransformationIsStarted();
    // See if we need to open sockets to remote input steps...
    // 
    openRemoteInputStepSocketsOnce();
    RowSet inputRowSet = null;
    Object[] row = null;
    synchronized (inputRowSetsLock) {
        // 
        if (inputRowSets.isEmpty()) {
            return null;
        }
        // Do we need to switch to the next input stream?
        if (blockPointer >= NR_OF_ROWS_IN_BLOCK) {
            // 
            for (int r = 0; r < inputRowSets.size() && row == null; r++) {
                nextInputStream();
                inputRowSet = currentInputStream();
                row = inputRowSet.getRowImmediate();
            }
            if (row != null) {
                incrementLinesRead();
            }
        } else {
            // What's the current input stream?
            inputRowSet = currentInputStream();
        }
        // 
        if (isUsingThreadPriorityManagment() && !inputRowSet.isDone() && inputRowSet.size() <= lowerBufferBoundary && !isStopped()) {
            try {
                Thread.sleep(0, 1);
            } catch (InterruptedException e) {
            // Ignore sleep interruption exception
            }
        }
        // 
        while (row == null && !isStopped()) {
            // Get a row from the input in row set ...
            // Timeout immediately if nothing is there to read.
            // We will then switch to the next row set to read from...
            // 
            row = inputRowSet.getRowWait(1, TimeUnit.MILLISECONDS);
            if (row != null) {
                incrementLinesRead();
                blockPointer++;
            } else {
                // 
                if (inputRowSet.isDone()) {
                    row = inputRowSet.getRowWait(1, TimeUnit.MILLISECONDS);
                    if (row == null) {
                        inputRowSets.remove(currentInputRowSetNr);
                        if (inputRowSets.isEmpty()) {
                            // We're completely done.
                            return null;
                        }
                    } else {
                        incrementLinesRead();
                    }
                }
                nextInputStream();
                inputRowSet = currentInputStream();
            }
        }
        // 
        while (row == null && !stopped.get()) {
            // 
            if (inputRowSets.isEmpty()) {
                // We're done.
                return null;
            }
            nextInputStream();
            inputRowSet = currentInputStream();
            row = getRowFrom(inputRowSet);
        }
    }
    // or if prevSteps.length > 1 inputRowMeta can be changed
    if (inputRowMeta == null || prevSteps.length > 1) {
        inputRowMeta = inputRowSet.getRowMeta();
    }
    if (row != null) {
        // 
        if (trans.isSafeModeEnabled()) {
            transMeta.checkRowMixingStatically(stepMeta, null);
        }
        for (RowListener listener : rowListeners) {
            listener.rowReadEvent(inputRowMeta, row);
        }
    }
    // Check the rejection rates etc. as well.
    verifyRejectionRates();
    return row;
}
Also used : KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet)

Aggregations

RowSet (org.pentaho.di.core.RowSet)109 Test (org.junit.Test)43 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)40 RowMeta (org.pentaho.di.core.row.RowMeta)34 QueueRowSet (org.pentaho.di.core.QueueRowSet)26 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)25 KettleException (org.pentaho.di.core.exception.KettleException)23 BlockingRowSet (org.pentaho.di.core.BlockingRowSet)21 KettleStepException (org.pentaho.di.core.exception.KettleStepException)19 ArrayList (java.util.ArrayList)16 StepInterface (org.pentaho.di.trans.step.StepInterface)13 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)12 StepMeta (org.pentaho.di.trans.step.StepMeta)11 SingleRowRowSet (org.pentaho.di.core.SingleRowRowSet)10 ValueMetaInteger (org.pentaho.di.core.row.value.ValueMetaInteger)9 RowAdapter (org.pentaho.di.trans.step.RowAdapter)9 Matchers.anyString (org.mockito.Matchers.anyString)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)7 IOException (java.io.IOException)6 ValueMetaNumber (org.pentaho.di.core.row.value.ValueMetaNumber)6