use of org.pentaho.di.core.BlockingRowSet in project pentaho-kettle by pentaho.
the class Trans method addRowProducer.
/**
* This adds a row producer to the transformation that just got set up. It is preferable to run this BEFORE execute()
* but after prepareExecution()
*
* @param stepname The step to produce rows for
* @param copynr The copynr of the step to produce row for (normally 0 unless you have multiple copies running)
* @return the row producer
* @throws KettleException in case the thread/step to produce rows for could not be found.
* @see Trans#execute(String[])
* @see Trans#prepareExecution(String[])
*/
public RowProducer addRowProducer(String stepname, int copynr) throws KettleException {
StepInterface stepInterface = getStepInterface(stepname, copynr);
if (stepInterface == null) {
throw new KettleException("Unable to find thread with name " + stepname + " and copy number " + copynr);
}
// We are going to add an extra RowSet to this stepInterface.
RowSet rowSet;
switch(transMeta.getTransformationType()) {
case Normal:
rowSet = new BlockingRowSet(transMeta.getSizeRowset());
break;
case SerialSingleThreaded:
rowSet = new SingleRowRowSet();
break;
case SingleThreaded:
rowSet = new QueueRowSet();
break;
default:
throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
}
// Add this rowset to the list of active rowsets for the selected step
stepInterface.addRowSetToInputRowSets(rowSet);
return new RowProducer(stepInterface, rowSet);
}
use of org.pentaho.di.core.BlockingRowSet in project pentaho-kettle by pentaho.
the class Trans method prepareExecution.
/**
* Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
* and tracking the steps and hops in the transformation.
*
* @param arguments the arguments to use for this transformation
* @throws KettleException in case the transformation could not be prepared (initialized)
*/
public void prepareExecution(String[] arguments) throws KettleException {
setPreparing(true);
startDate = null;
setRunning(false);
log.snap(Metrics.METRIC_TRANSFORMATION_EXECUTION_START);
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_START);
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationPrepareExecution.id, this);
checkCompatibility();
//
if (arguments != null) {
setArguments(arguments);
}
if (parentTrans != null) {
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentTrans, log));
} else {
// If there is no parent, one of these still needs to be called to instantiate a new cache
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentJob, log));
}
activateParameters();
transMeta.activateParameters();
ConnectionUtil.init(transMeta);
if (transMeta.getName() == null) {
if (transMeta.getFilename() != null) {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
}
} else {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
}
if (getArguments() != null) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
}
}
if (isSafeModeEnabled()) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
}
}
if (getReplayDate() != null) {
SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
} else {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
}
}
//
if (servletPrintWriter == null) {
String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
if (encoding == null) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
} else {
try {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
} catch (UnsupportedEncodingException ex) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
}
}
}
// Keep track of all the row sets and allocated steps
//
steps = new ArrayList<>();
rowsets = new ArrayList<>();
List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta thisStep = hopsteps.get(i);
if (thisStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
}
List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
int nrTargets = nextSteps.size();
for (int n = 0; n < nrTargets; n++) {
// What's the next step?
StepMeta nextStep = nextSteps.get(n);
if (nextStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
// How many times do we start the source step?
int thisCopies = thisStep.getCopies();
if (thisCopies < 0) {
//
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
}
// How many times do we start the target step?
int nextCopies = nextStep.getCopies();
// Are we re-partitioning?
boolean repartitioning;
if (thisStep.isPartitioned()) {
repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
} else {
repartitioning = nextStep.isPartitioned();
}
int nrCopies;
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
}
int dispatchType;
if (thisCopies == 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_1_1;
nrCopies = 1;
} else if (thisCopies == 1 && nextCopies > 1) {
dispatchType = TYPE_DISP_1_N;
nrCopies = nextCopies;
} else if (thisCopies > 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_N_1;
nrCopies = thisCopies;
} else if (thisCopies == nextCopies && !repartitioning) {
dispatchType = TYPE_DISP_N_N;
nrCopies = nextCopies;
} else {
// > 1!
dispatchType = TYPE_DISP_N_M;
nrCopies = nextCopies;
}
//
if (dispatchType != TYPE_DISP_N_M) {
for (int c = 0; c < nrCopies; c++) {
RowSet rowSet;
switch(transMeta.getTransformationType()) {
case Normal:
// This is a temporary patch until the batching rowset has proven
// to be working in all situations.
// Currently there are stalling problems when dealing with small
// amounts of rows.
//
Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
if (batchingRowSet != null && batchingRowSet.booleanValue()) {
rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
} else {
rowSet = new BlockingRowSet(transMeta.getSizeRowset());
}
break;
case SerialSingleThreaded:
rowSet = new SingleRowRowSet();
break;
case SingleThreaded:
rowSet = new QueueRowSet();
break;
default:
throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
}
switch(dispatchType) {
case TYPE_DISP_1_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
break;
case TYPE_DISP_1_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
break;
case TYPE_DISP_N_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
break;
case TYPE_DISP_N_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
break;
default:
break;
}
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
} else {
// distribution...
for (int s = 0; s < thisCopies; s++) {
for (int t = 0; t < nextCopies; t++) {
BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
}
}
}
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta stepMeta = hopsteps.get(i);
String stepid = stepMeta.getStepID();
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
}
// How many copies are launched of this step?
int nrCopies = stepMeta.getCopies();
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
}
// At least run once...
for (int c = 0; c < nrCopies; c++) {
// Make sure we haven't started it yet!
if (!hasStepStarted(stepMeta.getName(), c)) {
StepMetaDataCombi combi = new StepMetaDataCombi();
combi.stepname = stepMeta.getName();
combi.copy = c;
// The meta-data
combi.stepMeta = stepMeta;
combi.meta = stepMeta.getStepMetaInterface();
// Allocate the step data
StepDataInterface data = combi.meta.getStepData();
combi.data = data;
// Allocate the step
StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
// Copy the variables of the transformation to the step...
// don't share. Each copy of the step has its own variables.
//
step.initializeVariablesFrom(this);
step.setUsingThreadPriorityManagment(transMeta.isUsingThreadPriorityManagment());
// Pass the connected repository & metaStore to the steps runtime
//
step.setRepository(repository);
step.setMetaStore(metaStore);
// things as well...
if (stepMeta.isPartitioned()) {
List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
if (partitionIDs != null && !partitionIDs.isEmpty()) {
// Pass the partition ID
step.setPartitionID(partitionIDs.get(c));
// to the step
}
}
// Save the step too
combi.step = step;
// /
if (combi.step instanceof LoggingObjectInterface) {
LogChannelInterface logChannel = combi.step.getLogChannel();
logChannel.setLogLevel(logLevel);
logChannel.setGatheringMetrics(log.isGatheringMetrics());
}
// Add to the bunch...
steps.add(combi);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
}
}
}
}
//
for (int s = 0; s < steps.size(); s++) {
StepMetaDataCombi combi = steps.get(s);
if (combi.stepMeta.isDoingErrorHandling()) {
combi.step.identifyErrorOutput();
}
}
// Now (optionally) write start log record!
// Make sure we synchronize appropriately to avoid duplicate batch IDs.
//
Object syncObject = this;
if (parentJob != null) {
// parallel execution in a job
syncObject = parentJob;
}
if (parentTrans != null) {
// multiple sub-transformations
syncObject = parentTrans;
}
synchronized (syncObject) {
calculateBatchIdAndDateRange();
beginProcessing();
}
//
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi sid = steps.get(i);
StepMeta stepMeta = sid.stepMeta;
StepInterface baseStep = sid.step;
baseStep.setPartitioned(stepMeta.isPartitioned());
// Now let's take a look at the source and target relation
//
// If this source step is not partitioned, and the target step is: it
// means we need to re-partition the incoming data.
// If both steps are partitioned on the same method and schema, we don't
// need to re-partition
// If both steps are partitioned on a different method or schema, we need
// to re-partition as well.
// If both steps are not partitioned, we don't need to re-partition
//
boolean isThisPartitioned = stepMeta.isPartitioned();
PartitionSchema thisPartitionSchema = null;
if (isThisPartitioned) {
thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
}
boolean isNextPartitioned = false;
StepPartitioningMeta nextStepPartitioningMeta = null;
PartitionSchema nextPartitionSchema = null;
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
int nrNext = nextSteps.size();
for (int p = 0; p < nrNext; p++) {
StepMeta nextStep = nextSteps.get(p);
if (nextStep.isPartitioned()) {
isNextPartitioned = true;
nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
}
}
baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_NONE);
//
if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
baseStep.setRepartitioning(nextStepPartitioningMeta.getMethodType());
}
// For partitioning to a set of remove steps (repartitioning from a master
// to a set or remote output steps)
//
StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
baseStep.setRepartitioning(targetStepPartitioningMeta.getMethodType());
}
}
setPreparing(false);
setInitializing(true);
//
if (isMonitored() && steps.size() < 150) {
doTopologySortOfSteps();
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
}
StepInitThread[] initThreads = new StepInitThread[steps.size()];
Thread[] threads = new Thread[steps.size()];
//
for (int i = 0; i < steps.size(); i++) {
final StepMetaDataCombi sid = steps.get(i);
// Do the init code in the background!
// Init all steps at once, but ALL steps need to finish before we can
// continue properly!
//
initThreads[i] = new StepInitThread(sid, log);
// Put it in a separate thread!
//
threads[i] = new Thread(initThreads[i]);
threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepBeforeInitialize.id, initThreads[i]);
threads[i].start();
}
for (int i = 0; i < threads.length; i++) {
try {
threads[i].join();
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepAfterInitialize.id, initThreads[i]);
} catch (Exception ex) {
log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
log.logError(Const.getStackTracker(ex));
}
}
setInitializing(false);
boolean ok = true;
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
if (!initThreads[i].isOk()) {
log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
ok = false;
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_IDLE);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
}
}
}
if (!ok) {
// One or more steps failed on initialization.
// Transformation is now stopped.
setStopped(true);
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
// Dispose will overwrite the status, but we set it back right after
// this.
combi.step.dispose(combi.meta, combi.data);
if (initThreads[i].isOk()) {
combi.data.setStatus(StepExecutionStatus.STATUS_HALTED);
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
}
}
// Just for safety, fire the trans finished listeners...
try {
fireTransFinishedListeners();
} catch (KettleException e) {
// listeners produces errors
log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
// we will not pass this exception up to prepareExecution() entry point.
} finally {
// Flag the transformation as finished even if exception was thrown
setFinished(true);
}
//
if (preview) {
String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
} else {
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
}
}
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_STOP);
KettleEnvironment.setExecutionInformation(this, repository);
setReadyToStart(true);
}
use of org.pentaho.di.core.BlockingRowSet in project pentaho-kettle by pentaho.
the class GroupByTest method testProcessRow.
@Test
public void testProcessRow() throws KettleException {
GroupByMeta groupByMeta = mock(GroupByMeta.class);
GroupByData groupByData = mock(GroupByData.class);
GroupBy groupBySpy = Mockito.spy(new GroupBy(mockHelper.stepMeta, mockHelper.stepDataInterface, 0, mockHelper.transMeta, mockHelper.trans));
doReturn(null).when(groupBySpy).getRow();
doReturn(null).when(groupBySpy).getInputRowMeta();
RowMetaInterface rowMeta = new RowMeta();
rowMeta.addValueMeta(new ValueMetaInteger("ROWNR"));
List<RowSet> outputRowSets = new ArrayList<RowSet>();
BlockingRowSet rowSet = new BlockingRowSet(1);
rowSet.putRow(rowMeta, new Object[] { new Long(0) });
outputRowSets.add(rowSet);
groupBySpy.setOutputRowSets(outputRowSets);
final String[] sub = { "b" };
doReturn(sub).when(groupByMeta).getSubjectField();
final String[] groupField = { "a" };
doReturn(groupField).when(groupByMeta).getGroupField();
final String[] aggFields = { "b_g" };
doReturn(aggFields).when(groupByMeta).getAggregateField();
final int[] aggType = { GroupByMeta.TYPE_GROUP_CONCAT_COMMA };
doReturn(aggType).when(groupByMeta).getAggregateType();
when(mockHelper.transMeta.getPrevStepFields(mockHelper.stepMeta)).thenReturn(new RowMeta());
groupBySpy.processRow(groupByMeta, groupByData);
assertTrue(groupBySpy.getOutputRowSets().get(0).isDone());
}
use of org.pentaho.di.core.BlockingRowSet in project pentaho-kettle by pentaho.
the class PrioritizeStreamsExecutionIT method getTestData.
private PrioritizeStreamsData getTestData(TestData code) {
PrioritizeStreamsData retData = new PrioritizeStreamsData();
RowSet in1 = new BlockingRowSet(4);
RowSet in2 = new BlockingRowSet(4);
RowSet in3 = new BlockingRowSet(4);
Object[][] data1 = { { "a" }, { "a" }, { "a" } };
Object[][] data2 = { { "b" }, { "b" }, { "b" } };
Object[][] data3 = { { "c" }, { "c" }, { "c" } };
RowMetaInterface rmi = new RowMeta();
Runnable stream1 = this.getInputProduser(in1, rmi, data1);
Runnable stream2 = this.getInputProduser(in2, rmi, data2);
Runnable stream3 = this.getInputProduser(in3, rmi, data3);
retData.outputRowMeta = rmi;
switch(code) {
case ABC:
{
retData.rowSets = new RowSet[] { in1, in2, in3 };
break;
}
case BAC:
{
retData.rowSets = new RowSet[] { in2, in1, in3 };
break;
}
case BCA:
{
retData.rowSets = new RowSet[] { in2, in3, in1 };
break;
}
case CAB:
{
retData.rowSets = new RowSet[] { in3, in1, in2 };
break;
}
case CBA:
{
retData.rowSets = new RowSet[] { in3, in2, in1 };
break;
}
default:
{
Assert.fail("This test data does not supported: " + code.toString());
}
}
retData.currentRowSet = retData.rowSets[0];
retData.stepnr = 0;
retData.stepnrs = 3;
service.execute(stream1);
service.execute(stream2);
service.execute(stream3);
return retData;
}
use of org.pentaho.di.core.BlockingRowSet in project pentaho-kettle by pentaho.
the class MappingOutput method setConnectorSteps.
public void setConnectorSteps(StepInterface[] targetSteps, List<MappingValueRename> inputValueRenames, List<MappingValueRename> outputValueRenames) {
for (int i = 0; i < targetSteps.length; i++) {
// OK, before we leave, make sure there is a rowset that covers the path to this target step.
// We need to create a new RowSet and add it to the Input RowSets of the target step
//
BlockingRowSet rowSet = new BlockingRowSet(getTransMeta().getSizeRowset());
// This is always a single copy, but for source and target...
//
rowSet.setThreadNameFromToCopy(getStepname(), 0, targetSteps[i].getStepname(), 0);
// Make sure to connect it to both sides...
//
addRowSetToOutputRowSets(rowSet);
// Add the row set to the target step as input.
// This will appropriately drain the buffer as data comes in.
// However, as an exception, we can't attach it to another mapping step.
// We need to attach it to the appropriate mapping input step.
// The main problem is that we can't do it here since we don't know that the other step has initialized properly
// yet.
// This method is called during init() and we can't tell for sure it's done already.
// As such, we'll simply grab the remaining row sets at the Mapping#processRow() level and assign them to a
// Mapping Input step.
//
targetSteps[i].addRowSetToInputRowSets(rowSet);
}
data.inputValueRenames = inputValueRenames;
data.outputValueRenames = outputValueRenames;
data.targetSteps = targetSteps;
}
Aggregations