use of org.pentaho.di.core.BlockingBatchingRowSet in project pentaho-kettle by pentaho.
the class Trans method prepareExecution.
/**
* Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
* and tracking the steps and hops in the transformation.
*
* @param arguments the arguments to use for this transformation
* @throws KettleException in case the transformation could not be prepared (initialized)
*/
public void prepareExecution(String[] arguments) throws KettleException {
setPreparing(true);
startDate = null;
setRunning(false);
log.snap(Metrics.METRIC_TRANSFORMATION_EXECUTION_START);
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_START);
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationPrepareExecution.id, this);
checkCompatibility();
//
if (arguments != null) {
setArguments(arguments);
}
if (parentTrans != null) {
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentTrans, log));
} else {
// If there is no parent, one of these still needs to be called to instantiate a new cache
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentJob, log));
}
activateParameters();
transMeta.activateParameters();
ConnectionUtil.init(transMeta);
if (transMeta.getName() == null) {
if (transMeta.getFilename() != null) {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
}
} else {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
}
if (getArguments() != null) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
}
}
if (isSafeModeEnabled()) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
}
}
if (getReplayDate() != null) {
SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
} else {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
}
}
//
if (servletPrintWriter == null) {
String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
if (encoding == null) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
} else {
try {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
} catch (UnsupportedEncodingException ex) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
}
}
}
// Keep track of all the row sets and allocated steps
//
steps = new ArrayList<>();
rowsets = new ArrayList<>();
List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta thisStep = hopsteps.get(i);
if (thisStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
}
List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
int nrTargets = nextSteps.size();
for (int n = 0; n < nrTargets; n++) {
// What's the next step?
StepMeta nextStep = nextSteps.get(n);
if (nextStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
// How many times do we start the source step?
int thisCopies = thisStep.getCopies();
if (thisCopies < 0) {
//
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
}
// How many times do we start the target step?
int nextCopies = nextStep.getCopies();
// Are we re-partitioning?
boolean repartitioning;
if (thisStep.isPartitioned()) {
repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
} else {
repartitioning = nextStep.isPartitioned();
}
int nrCopies;
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
}
int dispatchType;
if (thisCopies == 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_1_1;
nrCopies = 1;
} else if (thisCopies == 1 && nextCopies > 1) {
dispatchType = TYPE_DISP_1_N;
nrCopies = nextCopies;
} else if (thisCopies > 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_N_1;
nrCopies = thisCopies;
} else if (thisCopies == nextCopies && !repartitioning) {
dispatchType = TYPE_DISP_N_N;
nrCopies = nextCopies;
} else {
// > 1!
dispatchType = TYPE_DISP_N_M;
nrCopies = nextCopies;
}
//
if (dispatchType != TYPE_DISP_N_M) {
for (int c = 0; c < nrCopies; c++) {
RowSet rowSet;
switch(transMeta.getTransformationType()) {
case Normal:
// This is a temporary patch until the batching rowset has proven
// to be working in all situations.
// Currently there are stalling problems when dealing with small
// amounts of rows.
//
Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
if (batchingRowSet != null && batchingRowSet.booleanValue()) {
rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
} else {
rowSet = new BlockingRowSet(transMeta.getSizeRowset());
}
break;
case SerialSingleThreaded:
rowSet = new SingleRowRowSet();
break;
case SingleThreaded:
rowSet = new QueueRowSet();
break;
default:
throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
}
switch(dispatchType) {
case TYPE_DISP_1_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
break;
case TYPE_DISP_1_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
break;
case TYPE_DISP_N_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
break;
case TYPE_DISP_N_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
break;
default:
break;
}
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
} else {
// distribution...
for (int s = 0; s < thisCopies; s++) {
for (int t = 0; t < nextCopies; t++) {
BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
}
}
}
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta stepMeta = hopsteps.get(i);
String stepid = stepMeta.getStepID();
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
}
// How many copies are launched of this step?
int nrCopies = stepMeta.getCopies();
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
}
// At least run once...
for (int c = 0; c < nrCopies; c++) {
// Make sure we haven't started it yet!
if (!hasStepStarted(stepMeta.getName(), c)) {
StepMetaDataCombi combi = new StepMetaDataCombi();
combi.stepname = stepMeta.getName();
combi.copy = c;
// The meta-data
combi.stepMeta = stepMeta;
combi.meta = stepMeta.getStepMetaInterface();
// Allocate the step data
StepDataInterface data = combi.meta.getStepData();
combi.data = data;
// Allocate the step
StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
// Copy the variables of the transformation to the step...
// don't share. Each copy of the step has its own variables.
//
step.initializeVariablesFrom(this);
step.setUsingThreadPriorityManagment(transMeta.isUsingThreadPriorityManagment());
// Pass the connected repository & metaStore to the steps runtime
//
step.setRepository(repository);
step.setMetaStore(metaStore);
// things as well...
if (stepMeta.isPartitioned()) {
List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
if (partitionIDs != null && !partitionIDs.isEmpty()) {
// Pass the partition ID
step.setPartitionID(partitionIDs.get(c));
// to the step
}
}
// Save the step too
combi.step = step;
// /
if (combi.step instanceof LoggingObjectInterface) {
LogChannelInterface logChannel = combi.step.getLogChannel();
logChannel.setLogLevel(logLevel);
logChannel.setGatheringMetrics(log.isGatheringMetrics());
}
// Add to the bunch...
steps.add(combi);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
}
}
}
}
//
for (int s = 0; s < steps.size(); s++) {
StepMetaDataCombi combi = steps.get(s);
if (combi.stepMeta.isDoingErrorHandling()) {
combi.step.identifyErrorOutput();
}
}
// Now (optionally) write start log record!
// Make sure we synchronize appropriately to avoid duplicate batch IDs.
//
Object syncObject = this;
if (parentJob != null) {
// parallel execution in a job
syncObject = parentJob;
}
if (parentTrans != null) {
// multiple sub-transformations
syncObject = parentTrans;
}
synchronized (syncObject) {
calculateBatchIdAndDateRange();
beginProcessing();
}
//
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi sid = steps.get(i);
StepMeta stepMeta = sid.stepMeta;
StepInterface baseStep = sid.step;
baseStep.setPartitioned(stepMeta.isPartitioned());
// Now let's take a look at the source and target relation
//
// If this source step is not partitioned, and the target step is: it
// means we need to re-partition the incoming data.
// If both steps are partitioned on the same method and schema, we don't
// need to re-partition
// If both steps are partitioned on a different method or schema, we need
// to re-partition as well.
// If both steps are not partitioned, we don't need to re-partition
//
boolean isThisPartitioned = stepMeta.isPartitioned();
PartitionSchema thisPartitionSchema = null;
if (isThisPartitioned) {
thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
}
boolean isNextPartitioned = false;
StepPartitioningMeta nextStepPartitioningMeta = null;
PartitionSchema nextPartitionSchema = null;
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
int nrNext = nextSteps.size();
for (int p = 0; p < nrNext; p++) {
StepMeta nextStep = nextSteps.get(p);
if (nextStep.isPartitioned()) {
isNextPartitioned = true;
nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
}
}
baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_NONE);
//
if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
baseStep.setRepartitioning(nextStepPartitioningMeta.getMethodType());
}
// For partitioning to a set of remove steps (repartitioning from a master
// to a set or remote output steps)
//
StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
baseStep.setRepartitioning(targetStepPartitioningMeta.getMethodType());
}
}
setPreparing(false);
setInitializing(true);
//
if (isMonitored() && steps.size() < 150) {
doTopologySortOfSteps();
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
}
StepInitThread[] initThreads = new StepInitThread[steps.size()];
Thread[] threads = new Thread[steps.size()];
//
for (int i = 0; i < steps.size(); i++) {
final StepMetaDataCombi sid = steps.get(i);
// Do the init code in the background!
// Init all steps at once, but ALL steps need to finish before we can
// continue properly!
//
initThreads[i] = new StepInitThread(sid, log);
// Put it in a separate thread!
//
threads[i] = new Thread(initThreads[i]);
threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepBeforeInitialize.id, initThreads[i]);
threads[i].start();
}
for (int i = 0; i < threads.length; i++) {
try {
threads[i].join();
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepAfterInitialize.id, initThreads[i]);
} catch (Exception ex) {
log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
log.logError(Const.getStackTracker(ex));
}
}
setInitializing(false);
boolean ok = true;
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
if (!initThreads[i].isOk()) {
log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
ok = false;
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_IDLE);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
}
}
}
if (!ok) {
// One or more steps failed on initialization.
// Transformation is now stopped.
setStopped(true);
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
// Dispose will overwrite the status, but we set it back right after
// this.
combi.step.dispose(combi.meta, combi.data);
if (initThreads[i].isOk()) {
combi.data.setStatus(StepExecutionStatus.STATUS_HALTED);
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
}
}
// Just for safety, fire the trans finished listeners...
try {
fireTransFinishedListeners();
} catch (KettleException e) {
// listeners produces errors
log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
// we will not pass this exception up to prepareExecution() entry point.
} finally {
// Flag the transformation as finished even if exception was thrown
setFinished(true);
}
//
if (preview) {
String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
} else {
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
}
}
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_STOP);
KettleEnvironment.setExecutionInformation(this, repository);
setReadyToStart(true);
}
Aggregations