use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class JoinRows method getRowData.
/**
* Get a row of data from the indicated rowset or buffer (memory/disk)
*
* @param filenr
* The rowset or buffer to read a row from
* @return a row of data
* @throws KettleException
* in case something goes wrong
*/
public Object[] getRowData(int filenr) throws KettleException {
data.restart[filenr] = false;
Object[] rowData = null;
// Do we read from the first rowset or a file?
if (filenr == 0) {
// Rowset 0:
RowSet rowSet = getFirstInputRowSet();
rowData = getRowFrom(rowSet);
if (rowData != null) {
data.fileRowMeta[0] = rowSet.getRowMeta();
}
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromStream") + (rowData == null ? "<null>" : data.fileRowMeta[0].getString(rowData)));
}
} else {
if (data.cache[filenr] == null) {
// See if we need to open the file?
if (data.dataInputStream[filenr] == null) {
try {
data.fileInputStream[filenr] = new FileInputStream(data.file[filenr]);
data.dataInputStream[filenr] = new DataInputStream(data.fileInputStream[filenr]);
} catch (FileNotFoundException fnfe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToFindOrOpenTemporaryFile") + data.file[filenr] + "] : " + fnfe.toString());
setErrors(1);
stopAll();
return null;
}
}
if (data.size[filenr] == 0) {
if (log.isBasic()) {
logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
}
return null;
}
try {
rowData = data.fileRowMeta[filenr].readData(data.dataInputStream[filenr]);
} catch (KettleFileException e) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
setErrors(1);
stopAll();
return null;
} catch (SocketTimeoutException e) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToReadDataFromTempFile") + filenr + " [" + data.file[filenr] + "]");
setErrors(1);
stopAll();
return null;
}
if (log.isRowLevel()) {
logRowlevel(BaseMessages.getString(PKG, "JoinRows.Log.ReadRowFromFile") + filenr + " : " + data.fileRowMeta[filenr].getString(rowData));
}
data.position[filenr]++;
// The file will then be re-opened if needed later on.
if (data.position[filenr] >= data.size[filenr]) {
try {
data.dataInputStream[filenr].close();
data.fileInputStream[filenr].close();
data.dataInputStream[filenr] = null;
data.fileInputStream[filenr] = null;
data.position[filenr] = 0;
// indicate that we restarted.
data.restart[filenr] = true;
} catch (IOException ioe) {
logError(BaseMessages.getString(PKG, "JoinRows.Log.UnableToCloseInputStream") + data.file[filenr] + "] : " + ioe.toString());
setErrors(1);
stopAll();
return null;
}
}
} else {
if (data.size[filenr] == 0) {
if (log.isBasic()) {
logBasic(BaseMessages.getString(PKG, "JoinRows.Log.NoRowsComingFromStep") + data.rs[filenr].getOriginStepName() + "]");
}
return null;
}
rowData = data.cache[filenr].get(data.position[data.filenr]);
// Don't forget to clone the data to protect it against data alteration downstream.
//
rowData = data.fileRowMeta[filenr].cloneRow(rowData);
data.position[filenr]++;
// The file will then be re-opened if needed later on.
if (data.position[filenr] >= data.size[filenr]) {
data.position[filenr] = 0;
// indicate that we restarted.
data.restart[filenr] = true;
}
}
}
return rowData;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class JoinRows method initialize.
/*
* Allocate input streams and create the temporary files...
*/
@SuppressWarnings("unchecked")
public void initialize() throws KettleException {
// Since we haven't called getRow() yet we need to wait until we have all input row sets available to us.
//
openRemoteInputStepSocketsOnce();
try {
// Start with the caching of the data, write later...
data.caching = true;
// Start at file 1, skip 0 for speed!
data.filenr = 1;
// See if a main step is supplied: in that case move the corresponding rowset to position 0
swapFirstInputRowSetIfExists(meta.getMainStepname());
List<RowSet> inputRowSets = getInputRowSets();
int rowSetsSize = inputRowSets.size();
// ** INPUT SIDE **
data.file = new File[rowSetsSize];
data.fileInputStream = new FileInputStream[rowSetsSize];
data.dataInputStream = new DataInputStream[rowSetsSize];
data.size = new int[rowSetsSize];
data.fileRowMeta = new RowMetaInterface[rowSetsSize];
data.joinrow = new Object[rowSetsSize][];
data.rs = new RowSet[rowSetsSize];
data.cache = new List[rowSetsSize];
data.position = new int[rowSetsSize];
data.fileOutputStream = new FileOutputStream[rowSetsSize];
data.dataOutputStream = new DataOutputStream[rowSetsSize];
data.restart = new boolean[rowSetsSize];
for (int i = 1; i < rowSetsSize; i++) {
String directoryName = environmentSubstitute(meta.getDirectory());
File file = null;
if (directoryName != null) {
file = new File(directoryName);
}
data.file[i] = File.createTempFile(meta.getPrefix(), ".tmp", file);
data.size[i] = 0;
data.rs[i] = inputRowSets.get(i);
data.cache[i] = null;
// data.row[i] = null;
data.position[i] = 0;
data.dataInputStream[i] = null;
data.dataOutputStream[i] = null;
data.joinrow[i] = null;
data.restart[i] = false;
}
} catch (Exception e) {
throw new KettleException(BaseMessages.getString(PKG, "JoinRows.Log.ErrorCreatingTemporaryFiles"), e);
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class JoinRows method batchComplete.
@Override
public void batchComplete() throws KettleException {
RowSet rowSet = getFirstInputRowSet();
int repeats = 0;
for (int i = 0; i < data.cache.length; i++) {
if (repeats == 0) {
repeats = 1;
}
if (data.cache[i] != null) {
repeats *= data.cache[i].size();
}
}
while (rowSet.size() > 0 && !isStopped()) {
processRow(meta, data);
}
//
for (int i = 0; i < repeats; i++) {
processRow(meta, data);
}
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class MultiMergeJoin method processFirstRow.
private boolean processFirstRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
meta = (MultiMergeJoinMeta) smi;
data = (MultiMergeJoinData) sdi;
TransMeta transMeta = getTransMeta();
TransHopMeta transHopMeta;
StepIOMetaInterface stepIOMeta = meta.getStepIOMeta();
List<StreamInterface> infoStreams = stepIOMeta.getInfoStreams();
StreamInterface stream;
StepMeta toStepMeta = meta.getParentStepMeta();
StepMeta fromStepMeta;
ArrayList<String> inputStepNameList = new ArrayList<String>();
String[] inputStepNames = meta.getInputSteps();
String inputStepName;
for (int i = 0; i < infoStreams.size(); i++) {
inputStepName = inputStepNames[i];
stream = infoStreams.get(i);
fromStepMeta = stream.getStepMeta();
if (fromStepMeta == null) {
// should not arrive here, shoud typically have been caught by init.
throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName));
}
// check the hop
transHopMeta = transMeta.findTransHop(fromStepMeta, toStepMeta, true);
// there is no hop: this is unexpected.
if (transHopMeta == null) {
// should not arrive here, shoud typically have been caught by init.
throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Log.UnableToFindReferenceStream", inputStepName));
} else if (transHopMeta.isEnabled()) {
inputStepNameList.add(inputStepName);
} else {
logDetailed(BaseMessages.getString(PKG, "MultiMergeJoin.Log.IgnoringStep", inputStepName));
}
}
int streamSize = inputStepNameList.size();
if (streamSize == 0) {
return false;
}
String keyField;
String[] keyFields;
data.rowSets = new RowSet[streamSize];
RowSet rowSet;
Object[] row;
data.rows = new Object[streamSize][];
data.metas = new RowMetaInterface[streamSize];
data.rowLengths = new int[streamSize];
MultiMergeJoinData.QueueComparator comparator = new MultiMergeJoinData.QueueComparator(data);
data.queue = new PriorityQueue<MultiMergeJoinData.QueueEntry>(streamSize, comparator);
data.results = new ArrayList<List<Object[]>>(streamSize);
MultiMergeJoinData.QueueEntry queueEntry;
data.queueEntries = new MultiMergeJoinData.QueueEntry[streamSize];
data.drainIndices = new int[streamSize];
data.keyNrs = new int[streamSize][];
data.dummy = new Object[streamSize][];
RowMetaInterface rowMeta;
data.outputRowMeta = new RowMeta();
for (int i = 0, j = 0; i < inputStepNames.length; i++) {
inputStepName = inputStepNames[i];
if (!inputStepNameList.contains(inputStepName)) {
// ignore step with disabled hop.
continue;
}
queueEntry = new MultiMergeJoinData.QueueEntry();
queueEntry.index = j;
data.queueEntries[j] = queueEntry;
data.results.add(new ArrayList<Object[]>());
rowSet = findInputRowSet(inputStepName);
if (rowSet == null) {
throw new KettleException(BaseMessages.getString(PKG, "MultiMergeJoin.Exception.UnableToFindSpecifiedStep", inputStepName));
}
data.rowSets[j] = rowSet;
row = getRowFrom(rowSet);
data.rows[j] = row;
if (row == null) {
rowMeta = getTransMeta().getStepFields(inputStepName);
data.metas[j] = rowMeta;
} else {
queueEntry.row = row;
rowMeta = rowSet.getRowMeta();
keyField = meta.getKeyFields()[i];
String[] keyFieldParts = keyField.split(",");
String keyFieldPart;
data.keyNrs[j] = new int[keyFieldParts.length];
for (int k = 0; k < keyFieldParts.length; k++) {
keyFieldPart = keyFieldParts[k];
data.keyNrs[j][k] = rowMeta.indexOfValue(keyFieldPart);
if (data.keyNrs[j][k] < 0) {
String message = BaseMessages.getString(PKG, "MultiMergeJoin.Exception.UnableToFindFieldInReferenceStream", keyFieldPart, inputStepName);
logError(message);
throw new KettleStepException(message);
}
}
data.metas[j] = rowMeta;
data.queue.add(data.queueEntries[j]);
}
data.outputRowMeta.mergeRowMeta(rowMeta.clone());
data.rowLengths[j] = rowMeta.size();
data.dummy[j] = RowDataUtil.allocateRowData(rowMeta.size());
j++;
}
return true;
}
use of org.pentaho.di.core.RowSet in project pentaho-kettle by pentaho.
the class BaseStep method handleGetRow.
private Object[] handleGetRow() throws KettleException {
//
while (paused.get() && !stopped.get()) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new KettleStepException(e);
}
}
if (stopped.get()) {
if (log.isDebug()) {
logDebug(BaseMessages.getString(PKG, "BaseStep.Log.StopLookingForMoreRows"));
}
stopAll();
return null;
}
// Small startup check
//
waitUntilTransformationIsStarted();
// See if we need to open sockets to remote input steps...
//
openRemoteInputStepSocketsOnce();
RowSet inputRowSet = null;
Object[] row = null;
synchronized (inputRowSetsLock) {
//
if (inputRowSets.isEmpty()) {
return null;
}
// Do we need to switch to the next input stream?
if (blockPointer >= NR_OF_ROWS_IN_BLOCK) {
//
for (int r = 0; r < inputRowSets.size() && row == null; r++) {
nextInputStream();
inputRowSet = currentInputStream();
row = inputRowSet.getRowImmediate();
}
if (row != null) {
incrementLinesRead();
}
} else {
// What's the current input stream?
inputRowSet = currentInputStream();
}
//
if (isUsingThreadPriorityManagment() && !inputRowSet.isDone() && inputRowSet.size() <= lowerBufferBoundary && !isStopped()) {
try {
Thread.sleep(0, 1);
} catch (InterruptedException e) {
// Ignore sleep interruption exception
}
}
//
while (row == null && !isStopped()) {
// Get a row from the input in row set ...
// Timeout immediately if nothing is there to read.
// We will then switch to the next row set to read from...
//
row = inputRowSet.getRowWait(1, TimeUnit.MILLISECONDS);
if (row != null) {
incrementLinesRead();
blockPointer++;
} else {
//
if (inputRowSet.isDone()) {
row = inputRowSet.getRowWait(1, TimeUnit.MILLISECONDS);
if (row == null) {
inputRowSets.remove(currentInputRowSetNr);
if (inputRowSets.isEmpty()) {
// We're completely done.
return null;
}
} else {
incrementLinesRead();
}
}
nextInputStream();
inputRowSet = currentInputStream();
}
}
//
while (row == null && !stopped.get()) {
//
if (inputRowSets.isEmpty()) {
// We're done.
return null;
}
nextInputStream();
inputRowSet = currentInputStream();
row = getRowFrom(inputRowSet);
}
}
// or if prevSteps.length > 1 inputRowMeta can be changed
if (inputRowMeta == null || prevSteps.length > 1) {
inputRowMeta = inputRowSet.getRowMeta();
}
if (row != null) {
//
if (trans.isSafeModeEnabled()) {
transMeta.checkRowMixingStatically(stepMeta, null);
}
for (RowListener listener : rowListeners) {
listener.rowReadEvent(inputRowMeta, row);
}
}
// Check the rejection rates etc. as well.
verifyRejectionRates();
return row;
}
Aggregations