Search in sources :

Example 6 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method specialPartitioning.

private void specialPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
    if (nextStepPartitioningMeta == null) {
        // Look up the partitioning of the next step.
        // This is the case for non-clustered partitioning...
        // 
        List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
        if (nextSteps.size() > 0) {
            nextStepPartitioningMeta = nextSteps.get(0).getStepPartitioningMeta();
        }
    // TODO: throw exception if we're not partitioning yet.
    // For now it throws a NP Exception.
    }
    int partitionNr;
    try {
        partitionNr = nextStepPartitioningMeta.getPartition(rowMeta, row);
    } catch (KettleException e) {
        throw new KettleStepException("Unable to convert a value to integer while calculating the partition number", e);
    }
    RowSet selectedRowSet = null;
    if (clusteredPartitioningFirst) {
        clusteredPartitioningFirst = false;
        // We are only running remotely if both the distribution is there AND if the distribution is actually contains
        // something.
        // 
        clusteredPartitioning = transMeta.getSlaveStepCopyPartitionDistribution() != null && !transMeta.getSlaveStepCopyPartitionDistribution().getDistribution().isEmpty();
    }
    // 
    if (clusteredPartitioning) {
        // 
        if (partitionNrRowSetList == null) {
            partitionNrRowSetList = new RowSet[outputRowSets.size()];
            // The distribution is calculated during transformation split
            // The slave-step-copy distribution is passed onto the slave transformation
            // 
            SlaveStepCopyPartitionDistribution distribution = transMeta.getSlaveStepCopyPartitionDistribution();
            String nextPartitionSchemaName = TransSplitter.createPartitionSchemaNameFromTarget(nextStepPartitioningMeta.getPartitionSchema().getName());
            for (RowSet outputRowSet : outputRowSets) {
                try {
                    // Look at the pre-determined distribution, decided at "transformation split" time.
                    // 
                    int partNr = distribution.getPartition(outputRowSet.getRemoteSlaveServerName(), nextPartitionSchemaName, outputRowSet.getDestinationStepCopy());
                    if (partNr < 0) {
                        throw new KettleStepException("Unable to find partition using rowset data, slave=" + outputRowSet.getRemoteSlaveServerName() + ", partition schema=" + nextStepPartitioningMeta.getPartitionSchema().getName() + ", copy=" + outputRowSet.getDestinationStepCopy());
                    }
                    partitionNrRowSetList[partNr] = outputRowSet;
                } catch (NullPointerException e) {
                    throw (e);
                }
            }
        }
        // 
        if (partitionNr < partitionNrRowSetList.length) {
            selectedRowSet = partitionNrRowSetList[partitionNr];
        } else {
            String rowsets = "";
            for (RowSet rowSet : partitionNrRowSetList) {
                rowsets += "[" + rowSet.toString() + "] ";
            }
            throw new KettleStepException("Internal error: the referenced partition nr '" + partitionNr + "' is higher than the maximum of '" + (partitionNrRowSetList.length - 1) + ".  The available row sets are: {" + rowsets + "}");
        }
        if (selectedRowSet == null) {
            logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
        } else {
            // Wait
            putRowToRowSet(selectedRowSet, rowMeta, row);
            incrementLinesWritten();
            if (log.isRowLevel()) {
                try {
                    logRowlevel("Partitioned #" + partitionNr + " to " + selectedRowSet + ", row=" + rowMeta.getString(row));
                } catch (KettleValueException e) {
                    throw new KettleStepException(e);
                }
            }
        }
    } else {
        // Local partitioning...
        // Put the row forward to the next step according to the partition rule.
        // 
        // Count of partitioned row at one step
        int partCount = ((BasePartitioner) nextStepPartitioningMeta.getPartitioner()).getNrPartitions();
        for (int i = 0; i < nextSteps.length; i++) {
            selectedRowSet = outputRowSets.get(partitionNr + i * partCount);
            if (selectedRowSet == null) {
                logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
            } else {
                // Wait
                putRowToRowSet(selectedRowSet, rowMeta, row);
                incrementLinesWritten();
                if (log.isRowLevel()) {
                    try {
                        logRowlevel(BaseMessages.getString(PKG, "BaseStep.PartitionedToRow", partitionNr, selectedRowSet, rowMeta.getString(row)));
                    } catch (KettleValueException e) {
                        throw new KettleStepException(e);
                    }
                }
            }
        }
    }
}
Also used : BasePartitioner(org.pentaho.di.trans.BasePartitioner) KettleException(org.pentaho.di.core.exception.KettleException) KettleStepException(org.pentaho.di.core.exception.KettleStepException) SlaveStepCopyPartitionDistribution(org.pentaho.di.trans.SlaveStepCopyPartitionDistribution) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) ValueMetaString(org.pentaho.di.core.row.value.ValueMetaString) KettleValueException(org.pentaho.di.core.exception.KettleValueException)

Example 7 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseStep method noPartitioning.

private void noPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
    if (distributed) {
        if (rowDistribution != null) {
            // Plugin defined row distribution!
            // 
            rowDistribution.distributeRow(rowMeta, row, this);
            incrementLinesWritten();
        } else {
            // ROUND ROBIN DISTRIBUTION:
            // --------------------------
            // Copy the row to the "next" output rowset.
            // We keep the next one in out_handling
            // 
            RowSet rs = outputRowSets.get(currentOutputRowSetNr);
            // 
            if (isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped()) {
                try {
                    Thread.sleep(0, 1);
                } catch (InterruptedException e) {
                // Ignore sleep interruption exception
                }
            }
            // Loop until we find room in the target rowset
            // 
            putRowToRowSet(rs, rowMeta, row);
            incrementLinesWritten();
            // 
            if (outputRowSets.size() > 1) {
                currentOutputRowSetNr++;
                if (currentOutputRowSetNr >= outputRowSets.size()) {
                    currentOutputRowSetNr = 0;
                }
            }
        }
    } else {
        // Copy to the row in the other output rowsets...
        for (int i = 1; i < outputRowSets.size(); i++) {
            // start at 1
            RowSet rs = outputRowSets.get(i);
            // 
            if (isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped()) {
                try {
                    Thread.sleep(0, 1);
                } catch (InterruptedException e) {
                // Ignore sleep interruption exception
                }
            }
            try {
                // Loop until we find room in the target rowset
                // 
                putRowToRowSet(rs, rowMeta, rowMeta.cloneRow(row));
                incrementLinesWritten();
            } catch (KettleValueException e) {
                throw new KettleStepException("Unable to clone row while copying rows to multiple target steps", e);
            }
        }
        // set row in first output rowset
        // 
        RowSet rs = outputRowSets.get(0);
        putRowToRowSet(rs, rowMeta, row);
        incrementLinesWritten();
    }
}
Also used : KettleStepException(org.pentaho.di.core.exception.KettleStepException) RowSet(org.pentaho.di.core.RowSet) BlockingRowSet(org.pentaho.di.core.BlockingRowSet) KettleValueException(org.pentaho.di.core.exception.KettleValueException)

Example 8 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class BaseFileInputStep method filesFromPreviousStep.

/**
 * Read files from previous step.
 */
private RowMetaInterface[] filesFromPreviousStep() throws KettleException {
    RowMetaInterface[] infoStep = null;
    data.files.getFiles().clear();
    int idx = -1;
    RowSet rowSet = findInputRowSet(meta.inputFiles.acceptingStepName);
    Object[] fileRow = getRowFrom(rowSet);
    while (fileRow != null) {
        RowMetaInterface prevInfoFields = rowSet.getRowMeta();
        if (idx < 0) {
            if (meta.inputFiles.passingThruFields) {
                data.passThruFields = new HashMap<FileObject, Object[]>();
                infoStep = new RowMetaInterface[] { prevInfoFields };
                data.nrPassThruFields = prevInfoFields.size();
            }
            idx = prevInfoFields.indexOfValue(meta.inputFiles.acceptingField);
            if (idx < 0) {
                logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToFindFilenameField", meta.inputFiles.acceptingField));
                setErrors(getErrors() + 1);
                stopAll();
                return null;
            }
        }
        String fileValue = prevInfoFields.getString(fileRow, idx);
        try {
            FileObject fileObject = KettleVFS.getFileObject(fileValue, getTransMeta());
            data.files.addFile(fileObject);
            if (meta.inputFiles.passingThruFields) {
                data.passThruFields.put(fileObject, fileRow);
            }
        } catch (KettleFileException e) {
            logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToCreateFileObject", fileValue), e);
        }
        // Grab another row
        fileRow = getRowFrom(rowSet);
    }
    if (data.files.nrOfFiles() == 0) {
        if (log.isDetailed()) {
            logDetailed(BaseMessages.getString(PKG, "TextFileInput.Log.Error.NoFilesSpecified"));
        }
        return null;
    }
    return infoStep;
}
Also used : KettleFileException(org.pentaho.di.core.exception.KettleFileException) RowSet(org.pentaho.di.core.RowSet) RowMetaInterface(org.pentaho.di.core.row.RowMetaInterface) FileObject(org.apache.commons.vfs2.FileObject) FileObject(org.apache.commons.vfs2.FileObject)

Example 9 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class ExcelInput method processRow.

public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
    meta = (ExcelInputMeta) smi;
    data = (ExcelInputData) sdi;
    if (first) {
        first = false;
        // start from scratch!
        data.outputRowMeta = new RowMeta();
        meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
        if (meta.isAcceptingFilenames()) {
            // Read the files from the specified input stream...
            data.files.getFiles().clear();
            int idx = -1;
            RowSet rowSet = findInputRowSet(meta.getAcceptingStepName());
            Object[] fileRow = getRowFrom(rowSet);
            while (fileRow != null) {
                if (idx < 0) {
                    idx = rowSet.getRowMeta().indexOfValue(meta.getAcceptingField());
                    if (idx < 0) {
                        logError(BaseMessages.getString(PKG, "ExcelInput.Error.FilenameFieldNotFound", "" + meta.getAcceptingField()));
                        setErrors(1);
                        stopAll();
                        return false;
                    }
                }
                String fileValue = rowSet.getRowMeta().getString(fileRow, idx);
                try {
                    data.files.addFile(KettleVFS.getFileObject(fileValue, getTransMeta()));
                } catch (KettleFileException e) {
                    throw new KettleException(BaseMessages.getString(PKG, "ExcelInput.Exception.CanNotCreateFileObject", fileValue), e);
                }
                // Grab another row
                fileRow = getRowFrom(rowSet);
            }
        }
        handleMissingFiles();
    }
    // We are done processing if the filenr >= number of files.
    if (data.filenr >= data.files.nrOfFiles()) {
        if (log.isDetailed()) {
            logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.NoMoreFiles", "" + data.filenr));
        }
        // signal end to receiver(s)
        setOutputDone();
        // end of data or error.
        return false;
    }
    // in this case we have to stop a row "earlier", since we start a row number 0 !!!
    if ((meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) || (meta.readAllSheets() && meta.getRowLimit() > 0 && data.defaultStartRow == 0 && data.rownr > meta.getRowLimit() - 1) || (!meta.readAllSheets() && meta.getRowLimit() > 0 && data.startRow[data.sheetnr] == 0 && data.rownr > meta.getRowLimit() - 1)) {
        // The close of the openFile is in dispose()
        if (log.isDetailed()) {
            logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.RowLimitReached", "" + meta.getRowLimit()));
        }
        // signal end to receiver(s)
        setOutputDone();
        // end of data or error.
        return false;
    }
    Object[] r = getRowFromWorkbooks();
    if (r != null) {
        incrementLinesInput();
        // OK, see if we need to repeat values.
        if (data.previousRow != null) {
            for (int i = 0; i < meta.getField().length; i++) {
                ValueMetaInterface valueMeta = data.outputRowMeta.getValueMeta(i);
                Object valueData = r[i];
                if (valueMeta.isNull(valueData) && meta.getField()[i].isRepeated()) {
                    // Take the value from the previous row.
                    r[i] = data.previousRow[i];
                }
            }
        }
        // Remember this row for the next time around!
        data.previousRow = data.outputRowMeta.cloneRow(r);
        // Send out the good news: we found a row of data!
        putRow(data.outputRowMeta, r);
        return true;
    } else {
        // We continue though.
        return true;
    }
}
Also used : KettleException(org.pentaho.di.core.exception.KettleException) KettleFileException(org.pentaho.di.core.exception.KettleFileException) RowMeta(org.pentaho.di.core.row.RowMeta) RowSet(org.pentaho.di.core.RowSet) FileObject(org.apache.commons.vfs2.FileObject) ValueMetaInterface(org.pentaho.di.core.row.ValueMetaInterface)

Example 10 with RowSet

use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.

the class JoinRows method cacheInputRow.

private boolean cacheInputRow() throws KettleException {
    if (data.filenr >= data.file.length) {
        // Switch the mode to reading back from the data cache
        data.caching = false;
        // Start back at filenr = 0
        data.filenr = 0;
        return true;
    }
    // We need to open a new outputstream
    if (data.dataOutputStream[data.filenr] == null) {
        try {
            // Open the temp file
            data.fileOutputStream[data.filenr] = new FileOutputStream(data.file[data.filenr]);
            // Open the data output stream...
            data.dataOutputStream[data.filenr] = new DataOutputStream(data.fileOutputStream[data.filenr]);
        } catch (FileNotFoundException fnfe) {
            logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToOpenOutputstream") + data.file[data.filenr].toString() + "] : " + fnfe.toString());
            stopAll();
            setErrors(1);
            return false;
        }
    }
    // Read a line from the appropriate rowset...
    RowSet rowSet = data.rs[data.filenr];
    Object[] rowData = getRowFrom(rowSet);
    if (rowData != null) {
        if (data.fileRowMeta[data.filenr] == null) {
            // The first row is used as meta-data, clone it for safety
            data.fileRowMeta[data.filenr] = rowSet.getRowMeta().clone();
        }
        data.fileRowMeta[data.filenr].writeData(data.dataOutputStream[data.filenr], rowData);
        data.size[data.filenr]++;
        if (log.isRowLevel()) {
            logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromStreamN", data.filenr, data.fileRowMeta[data.filenr].getString(rowData)));
        }
        // 
        if (data.size[data.filenr] <= meta.getCacheSize()) {
            if (data.cache[data.filenr] == null) {
                data.cache[data.filenr] = new ArrayList<Object[]>();
            }
            // Add this row to the cache!
            data.cache[data.filenr].add(rowData);
        } else {
            // we can't cope with this many rows: reset the cache...
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "JoinRows.Log.RowsFound", meta.getCacheSize() + "", data.rs[data.filenr].getOriginStepName()));
            }
            data.cache[data.filenr] = null;
        }
    } else {
        // Close outputstream.
        try {
            data.dataOutputStream[data.filenr].close();
            data.fileOutputStream[data.filenr].close();
            data.dataOutputStream[data.filenr] = null;
            data.fileOutputStream[data.filenr] = null;
        } catch (IOException ioe) {
            logError(BaseMessages.getString(PKG, "JoinRows.Log.ErrorInClosingOutputStream") + data.filenr + " : [" + data.file[data.filenr].toString() + "] : " + ioe.toString());
        }
        // Advance to the next file/input-stream...
        data.filenr++;
    }
    return true;
}
Also used : DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) FileNotFoundException(java.io.FileNotFoundException) RowSet(org.pentaho.di.core.RowSet) IOException(java.io.IOException)

Aggregations

RowSet (org.pentaho.di.core.RowSet)109 Test (org.junit.Test)43 RowMetaInterface (org.pentaho.di.core.row.RowMetaInterface)40 RowMeta (org.pentaho.di.core.row.RowMeta)34 QueueRowSet (org.pentaho.di.core.QueueRowSet)26 ValueMetaString (org.pentaho.di.core.row.value.ValueMetaString)25 KettleException (org.pentaho.di.core.exception.KettleException)23 BlockingRowSet (org.pentaho.di.core.BlockingRowSet)21 KettleStepException (org.pentaho.di.core.exception.KettleStepException)19 ArrayList (java.util.ArrayList)16 StepInterface (org.pentaho.di.trans.step.StepInterface)13 ValueMetaInterface (org.pentaho.di.core.row.ValueMetaInterface)12 StepMeta (org.pentaho.di.trans.step.StepMeta)11 SingleRowRowSet (org.pentaho.di.core.SingleRowRowSet)10 ValueMetaInteger (org.pentaho.di.core.row.value.ValueMetaInteger)9 RowAdapter (org.pentaho.di.trans.step.RowAdapter)9 Matchers.anyString (org.mockito.Matchers.anyString)7 RowMetaAndData (org.pentaho.di.core.RowMetaAndData)7 IOException (java.io.IOException)6 ValueMetaNumber (org.pentaho.di.core.row.value.ValueMetaNumber)6