use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method specialPartitioning.
private void specialPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
if (nextStepPartitioningMeta == null) {
// Look up the partitioning of the next step.
// This is the case for non-clustered partitioning...
//
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
if (nextSteps.size() > 0) {
nextStepPartitioningMeta = nextSteps.get(0).getStepPartitioningMeta();
}
// TODO: throw exception if we're not partitioning yet.
// For now it throws a NP Exception.
}
int partitionNr;
try {
partitionNr = nextStepPartitioningMeta.getPartition(rowMeta, row);
} catch (KettleException e) {
throw new KettleStepException("Unable to convert a value to integer while calculating the partition number", e);
}
RowSet selectedRowSet = null;
if (clusteredPartitioningFirst) {
clusteredPartitioningFirst = false;
// We are only running remotely if both the distribution is there AND if the distribution is actually contains
// something.
//
clusteredPartitioning = transMeta.getSlaveStepCopyPartitionDistribution() != null && !transMeta.getSlaveStepCopyPartitionDistribution().getDistribution().isEmpty();
}
//
if (clusteredPartitioning) {
//
if (partitionNrRowSetList == null) {
partitionNrRowSetList = new RowSet[outputRowSets.size()];
// The distribution is calculated during transformation split
// The slave-step-copy distribution is passed onto the slave transformation
//
SlaveStepCopyPartitionDistribution distribution = transMeta.getSlaveStepCopyPartitionDistribution();
String nextPartitionSchemaName = TransSplitter.createPartitionSchemaNameFromTarget(nextStepPartitioningMeta.getPartitionSchema().getName());
for (RowSet outputRowSet : outputRowSets) {
try {
// Look at the pre-determined distribution, decided at "transformation split" time.
//
int partNr = distribution.getPartition(outputRowSet.getRemoteSlaveServerName(), nextPartitionSchemaName, outputRowSet.getDestinationStepCopy());
if (partNr < 0) {
throw new KettleStepException("Unable to find partition using rowset data, slave=" + outputRowSet.getRemoteSlaveServerName() + ", partition schema=" + nextStepPartitioningMeta.getPartitionSchema().getName() + ", copy=" + outputRowSet.getDestinationStepCopy());
}
partitionNrRowSetList[partNr] = outputRowSet;
} catch (NullPointerException e) {
throw (e);
}
}
}
//
if (partitionNr < partitionNrRowSetList.length) {
selectedRowSet = partitionNrRowSetList[partitionNr];
} else {
String rowsets = "";
for (RowSet rowSet : partitionNrRowSetList) {
rowsets += "[" + rowSet.toString() + "] ";
}
throw new KettleStepException("Internal error: the referenced partition nr '" + partitionNr + "' is higher than the maximum of '" + (partitionNrRowSetList.length - 1) + ". The available row sets are: {" + rowsets + "}");
}
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel("Partitioned #" + partitionNr + " to " + selectedRowSet + ", row=" + rowMeta.getString(row));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
} else {
// Local partitioning...
// Put the row forward to the next step according to the partition rule.
//
// Count of partitioned row at one step
int partCount = ((BasePartitioner) nextStepPartitioningMeta.getPartitioner()).getNrPartitions();
for (int i = 0; i < nextSteps.length; i++) {
selectedRowSet = outputRowSets.get(partitionNr + i * partCount);
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel(BaseMessages.getString(PKG, "BaseStep.PartitionedToRow", partitionNr, selectedRowSet, rowMeta.getString(row)));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
}
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method noPartitioning.
private void noPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
if (distributed) {
if (rowDistribution != null) {
// Plugin defined row distribution!
//
rowDistribution.distributeRow(rowMeta, row, this);
incrementLinesWritten();
} else {
// ROUND ROBIN DISTRIBUTION:
// --------------------------
// Copy the row to the "next" output rowset.
// We keep the next one in out_handling
//
RowSet rs = outputRowSets.get(currentOutputRowSetNr);
//
if (isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped()) {
try {
Thread.sleep(0, 1);
} catch (InterruptedException e) {
// Ignore sleep interruption exception
}
}
// Loop until we find room in the target rowset
//
putRowToRowSet(rs, rowMeta, row);
incrementLinesWritten();
//
if (outputRowSets.size() > 1) {
currentOutputRowSetNr++;
if (currentOutputRowSetNr >= outputRowSets.size()) {
currentOutputRowSetNr = 0;
}
}
}
} else {
// Copy to the row in the other output rowsets...
for (int i = 1; i < outputRowSets.size(); i++) {
// start at 1
RowSet rs = outputRowSets.get(i);
//
if (isUsingThreadPriorityManagment() && !rs.isDone() && rs.size() >= upperBufferBoundary && !isStopped()) {
try {
Thread.sleep(0, 1);
} catch (InterruptedException e) {
// Ignore sleep interruption exception
}
}
try {
// Loop until we find room in the target rowset
//
putRowToRowSet(rs, rowMeta, rowMeta.cloneRow(row));
incrementLinesWritten();
} catch (KettleValueException e) {
throw new KettleStepException("Unable to clone row while copying rows to multiple target steps", e);
}
}
// set row in first output rowset
//
RowSet rs = outputRowSets.get(0);
putRowToRowSet(rs, rowMeta, row);
incrementLinesWritten();
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseFileInputStep method filesFromPreviousStep.
/**
* Read files from previous step.
*/
private RowMetaInterface[] filesFromPreviousStep() throws KettleException {
RowMetaInterface[] infoStep = null;
data.files.getFiles().clear();
int idx = -1;
RowSet rowSet = findInputRowSet(meta.inputFiles.acceptingStepName);
Object[] fileRow = getRowFrom(rowSet);
while (fileRow != null) {
RowMetaInterface prevInfoFields = rowSet.getRowMeta();
if (idx < 0) {
if (meta.inputFiles.passingThruFields) {
data.passThruFields = new HashMap<FileObject, Object[]>();
infoStep = new RowMetaInterface[] { prevInfoFields };
data.nrPassThruFields = prevInfoFields.size();
}
idx = prevInfoFields.indexOfValue(meta.inputFiles.acceptingField);
if (idx < 0) {
logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToFindFilenameField", meta.inputFiles.acceptingField));
setErrors(getErrors() + 1);
stopAll();
return null;
}
}
String fileValue = prevInfoFields.getString(fileRow, idx);
try {
FileObject fileObject = KettleVFS.getFileObject(fileValue, getTransMeta());
data.files.addFile(fileObject);
if (meta.inputFiles.passingThruFields) {
data.passThruFields.put(fileObject, fileRow);
}
} catch (KettleFileException e) {
logError(BaseMessages.getString(PKG, "TextFileInput.Log.Error.UnableToCreateFileObject", fileValue), e);
}
// Grab another row
fileRow = getRowFrom(rowSet);
}
if (data.files.nrOfFiles() == 0) {
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "TextFileInput.Log.Error.NoFilesSpecified"));
}
return null;
}
return infoStep;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class ExcelInput method processRow.
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
meta = (ExcelInputMeta) smi;
data = (ExcelInputData) sdi;
if (first) {
first = false;
// start from scratch!
data.outputRowMeta = new RowMeta();
meta.getFields(data.outputRowMeta, getStepname(), null, null, this, repository, metaStore);
if (meta.isAcceptingFilenames()) {
// Read the files from the specified input stream...
data.files.getFiles().clear();
int idx = -1;
RowSet rowSet = findInputRowSet(meta.getAcceptingStepName());
Object[] fileRow = getRowFrom(rowSet);
while (fileRow != null) {
if (idx < 0) {
idx = rowSet.getRowMeta().indexOfValue(meta.getAcceptingField());
if (idx < 0) {
logError(BaseMessages.getString(PKG, "ExcelInput.Error.FilenameFieldNotFound", "" + meta.getAcceptingField()));
setErrors(1);
stopAll();
return false;
}
}
String fileValue = rowSet.getRowMeta().getString(fileRow, idx);
try {
data.files.addFile(KettleVFS.getFileObject(fileValue, getTransMeta()));
} catch (KettleFileException e) {
throw new KettleException(BaseMessages.getString(PKG, "ExcelInput.Exception.CanNotCreateFileObject", fileValue), e);
}
// Grab another row
fileRow = getRowFrom(rowSet);
}
}
handleMissingFiles();
}
// We are done processing if the filenr >= number of files.
if (data.filenr >= data.files.nrOfFiles()) {
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.NoMoreFiles", "" + data.filenr));
}
// signal end to receiver(s)
setOutputDone();
// end of data or error.
return false;
}
// in this case we have to stop a row "earlier", since we start a row number 0 !!!
if ((meta.getRowLimit() > 0 && data.rownr > meta.getRowLimit()) || (meta.readAllSheets() && meta.getRowLimit() > 0 && data.defaultStartRow == 0 && data.rownr > meta.getRowLimit() - 1) || (!meta.readAllSheets() && meta.getRowLimit() > 0 && data.startRow[data.sheetnr] == 0 && data.rownr > meta.getRowLimit() - 1)) {
// The close of the openFile is in dispose()
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "ExcelInput.Log.RowLimitReached", "" + meta.getRowLimit()));
}
// signal end to receiver(s)
setOutputDone();
// end of data or error.
return false;
}
Object[] r = getRowFromWorkbooks();
if (r != null) {
incrementLinesInput();
// OK, see if we need to repeat values.
if (data.previousRow != null) {
for (int i = 0; i < meta.getField().length; i++) {
ValueMetaInterface valueMeta = data.outputRowMeta.getValueMeta(i);
Object valueData = r[i];
if (valueMeta.isNull(valueData) && meta.getField()[i].isRepeated()) {
// Take the value from the previous row.
r[i] = data.previousRow[i];
}
}
}
// Remember this row for the next time around!
data.previousRow = data.outputRowMeta.cloneRow(r);
// Send out the good news: we found a row of data!
putRow(data.outputRowMeta, r);
return true;
} else {
// We continue though.
return true;
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class JoinRows method cacheInputRow.
private boolean cacheInputRow() throws KettleException {
if (data.filenr >= data.file.length) {
// Switch the mode to reading back from the data cache
data.caching = false;
// Start back at filenr = 0
data.filenr = 0;
return true;
}
// We need to open a new outputstream
if (data.dataOutputStream[data.filenr] == null) {
try {
// Open the temp file
data.fileOutputStream[data.filenr] = new FileOutputStream(data.file[data.filenr]);
// Open the data output stream...
data.dataOutputStream[data.filenr] = new DataOutputStream(data.fileOutputStream[data.filenr]);
} catch (FileNotFoundException fnfe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToOpenOutputstream") + data.file[data.filenr].toString() + "] : " + fnfe.toString());
stopAll();
setErrors(1);
return false;
}
}
// Read a line from the appropriate rowset...
RowSet rowSet = data.rs[data.filenr];
Object[] rowData = getRowFrom(rowSet);
if (rowData != null) {
if (data.fileRowMeta[data.filenr] == null) {
// The first row is used as meta-data, clone it for safety
data.fileRowMeta[data.filenr] = rowSet.getRowMeta().clone();
}
data.fileRowMeta[data.filenr].writeData(data.dataOutputStream[data.filenr], rowData);
data.size[data.filenr]++;
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromStreamN", data.filenr, data.fileRowMeta[data.filenr].getString(rowData)));
}
//
if (data.size[data.filenr] <= meta.getCacheSize()) {
if (data.cache[data.filenr] == null) {
data.cache[data.filenr] = new ArrayList<Object[]>();
}
// Add this row to the cache!
data.cache[data.filenr].add(rowData);
} else {
// we can't cope with this many rows: reset the cache...
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "JoinRows.Log.RowsFound", meta.getCacheSize() + "", data.rs[data.filenr].getOriginStepName()));
}
data.cache[data.filenr] = null;
}
} else {
// Close outputstream.
try {
data.dataOutputStream[data.filenr].close();
data.fileOutputStream[data.filenr].close();
data.dataOutputStream[data.filenr] = null;
data.fileOutputStream[data.filenr] = null;
} catch (IOException ioe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.ErrorInClosingOutputStream") + data.filenr + " : [" + data.file[data.filenr].toString() + "] : " + ioe.toString());
}
// Advance to the next file/input-stream...
data.filenr++;
}
return true;
}
Aggregations