use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class Trans method prepareExecution.
* Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
* and tracking the steps and hops in the transformation.
* @param arguments
* the arguments to use for this transformation
* @throws KettleException
* in case the transformation could not be prepared (initialized)
public void prepareExecution(String[] arguments) throws KettleException {
startDate = null;
ExtensionPointHandler.callExtensionPoint(log,, this);
if (transMeta.getMetastoreLocatorOsgi() != null) {
if (arguments != null) {
if (transMeta.getName() == null) {
if (transMeta.getFilename() != null) {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
} else {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
if (getArguments() != null) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
if (isSafeModeEnabled()) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
if (getReplayDate() != null) {
SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
} else {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
if (servletPrintWriter == null) {
String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
if (encoding == null) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
} else {
try {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
} catch (UnsupportedEncodingException ex) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
// Keep track of all the row sets and allocated steps
steps = new ArrayList<>();
rowsets = new ArrayList<>();
List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta thisStep = hopsteps.get(i);
if (thisStep.isMapping()) {
// handled and allocated by the mapping step itself.
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
int nrTargets = nextSteps.size();
for (int n = 0; n < nrTargets; n++) {
// What's the next step?
StepMeta nextStep = nextSteps.get(n);
if (nextStep.isMapping()) {
// handled and allocated by the mapping step itself.
// How many times do we start the source step?
int thisCopies = thisStep.getCopies();
if (thisCopies < 0) {
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
// How many times do we start the target step?
int nextCopies = nextStep.getCopies();
// Are we re-partitioning?
boolean repartitioning;
if (thisStep.isPartitioned()) {
repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
} else {
repartitioning = nextStep.isPartitioned();
int nrCopies;
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
int dispatchType;
if (thisCopies == 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_1_1;
nrCopies = 1;
} else if (thisCopies == 1 && nextCopies > 1) {
dispatchType = TYPE_DISP_1_N;
nrCopies = nextCopies;
} else if (thisCopies > 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_N_1;
nrCopies = thisCopies;
} else if (thisCopies == nextCopies && !repartitioning) {
dispatchType = TYPE_DISP_N_N;
nrCopies = nextCopies;
} else {
// > 1!
dispatchType = TYPE_DISP_N_M;
nrCopies = nextCopies;
if (dispatchType != TYPE_DISP_N_M) {
for (int c = 0; c < nrCopies; c++) {
RowSet rowSet;
switch(transMeta.getTransformationType()) {
case Normal:
// This is a temporary patch until the batching rowset has proven
// to be working in all situations.
// Currently there are stalling problems when dealing with small
// amounts of rows.
Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
if (batchingRowSet != null && batchingRowSet.booleanValue()) {
rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
} else {
rowSet = new BlockingRowSet(transMeta.getSizeRowset());
case SerialSingleThreaded:
rowSet = new SingleRowRowSet();
case SingleThreaded:
rowSet = new QueueRowSet();
throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
switch(dispatchType) {
case TYPE_DISP_1_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
case TYPE_DISP_1_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
case TYPE_DISP_N_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
} else {
// distribution...
for (int s = 0; s < thisCopies; s++) {
for (int t = 0; t < nextCopies; t++) {
BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta stepMeta = hopsteps.get(i);
String stepid = stepMeta.getStepID();
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
// How many copies are launched of this step?
int nrCopies = stepMeta.getCopies();
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
// At least run once...
for (int c = 0; c < nrCopies; c++) {
// Make sure we haven't started it yet!
if (!hasStepStarted(stepMeta.getName(), c)) {
StepMetaDataCombi combi = new StepMetaDataCombi();
combi.stepname = stepMeta.getName();
combi.copy = c;
// The meta-data
combi.stepMeta = stepMeta;
combi.meta = stepMeta.getStepMetaInterface();
// Allocate the step data
StepDataInterface data = combi.meta.getStepData(); = data;
// Allocate the step
StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
// Copy the variables of the transformation to the step...
// don't share. Each copy of the step has its own variables.
// Pass the connected repository & metaStore to the steps runtime
// things as well...
if (stepMeta.isPartitioned()) {
List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
if (partitionIDs != null && partitionIDs.size() > 0) {
// Pass the partition ID
// to the step
// Save the step too
combi.step = step;
// /
if (combi.step instanceof LoggingObjectInterface) {
LogChannelInterface logChannel = combi.step.getLogChannel();
// Add to the bunch...
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
for (int s = 0; s < steps.size(); s++) {
StepMetaDataCombi combi = steps.get(s);
if (combi.stepMeta.isDoingErrorHandling()) {
// Now (optionally) write start log record!
// Make sure we synchronize appropriately to avoid duplicate batch IDs.
Object syncObject = this;
if (parentJob != null) {
// parallel execution in a job
syncObject = parentJob;
if (parentTrans != null) {
// multiple sub-transformations
syncObject = parentTrans;
synchronized (syncObject) {
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi sid = steps.get(i);
StepMeta stepMeta = sid.stepMeta;
StepInterface baseStep = sid.step;
// Now let's take a look at the source and target relation
// If this source step is not partitioned, and the target step is: it
// means we need to re-partition the incoming data.
// If both steps are partitioned on the same method and schema, we don't
// need to re-partition
// If both steps are partitioned on a different method or schema, we need
// to re-partition as well.
// If both steps are not partitioned, we don't need to re-partition
boolean isThisPartitioned = stepMeta.isPartitioned();
PartitionSchema thisPartitionSchema = null;
if (isThisPartitioned) {
thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
boolean isNextPartitioned = false;
StepPartitioningMeta nextStepPartitioningMeta = null;
PartitionSchema nextPartitionSchema = null;
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
int nrNext = nextSteps.size();
for (int p = 0; p < nrNext; p++) {
StepMeta nextStep = nextSteps.get(p);
if (nextStep.isPartitioned()) {
isNextPartitioned = true;
nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
// For partitioning to a set of remove steps (repartitioning from a master
// to a set or remote output steps)
StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
if (isMonitored() && steps.size() < 150) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
StepInitThread[] initThreads = new StepInitThread[steps.size()];
Thread[] threads = new Thread[steps.size()];
for (int i = 0; i < steps.size(); i++) {
final StepMetaDataCombi sid = steps.get(i);
// Do the init code in the background!
// Init all steps at once, but ALL steps need to finish before we can
// continue properly!
initThreads[i] = new StepInitThread(sid, log);
// Put it in a separate thread!
threads[i] = new Thread(initThreads[i]);
threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
ExtensionPointHandler.callExtensionPoint(log,, initThreads[i]);
for (int i = 0; i < threads.length; i++) {
try {
ExtensionPointHandler.callExtensionPoint(log,, initThreads[i]);
} catch (Exception ex) {
log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
boolean ok = true;
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
if (!initThreads[i].isOk()) {
log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));;
ok = false;
} else {;
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
if (!ok) {
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
// Dispose will overwrite the status, but we set it back right after
// this.
if (initThreads[i].isOk()) {;
} else {;
// Just for safety, fire the trans finished listeners...
try {
} catch (KettleException e) {
// listeners produces errors
log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
// we will not pass this exception up to prepareExecuton() entry point.
} finally {
// Flag the transformation as finished even if exception was thrown
if (preview) {
String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
} else {
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
KettleEnvironment.setExecutionInformation(this, repository);
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class KettleDatabaseRepositoryStepDelegate method loadStepPartitioningMeta.
public StepPartitioningMeta loadStepPartitioningMeta(ObjectId id_step) throws KettleException {
StepPartitioningMeta stepPartitioningMeta = new StepPartitioningMeta();
stepPartitioningMeta.setPartitionSchemaName(repository.getStepAttributeString(id_step, "PARTITIONING_SCHEMA"));
String methodCode = repository.getStepAttributeString(id_step, "PARTITIONING_METHOD");
if (stepPartitioningMeta.getPartitioner() != null) {
stepPartitioningMeta.getPartitioner().loadRep(repository, id_step);
return stepPartitioningMeta;
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransMeta method loadXML.
* Parses an XML DOM (starting at the specified Node) that describes the transformation.
* @param transnode
* The XML node to load from
* @param fname
* The filename
* @param rep
* The repository to load the default list of database connections from (null if no repository is available)
* @param setInternalVariables
* true if you want to set the internal variables based on this transformation information
* @param parentVariableSpace
* the parent variable space to use during TransMeta construction
* @param prompter
* the changed/replace listener or null if there is none
* @throws KettleXMLException
* if any errors occur during parsing of the specified file
* @throws KettleMissingPluginsException
* in case missing plugins were found (details are in the exception in that case)
public void loadXML(Node transnode, String fname, IMetaStore metaStore, Repository rep, boolean setInternalVariables, VariableSpace parentVariableSpace, OverwritePrompter prompter) throws KettleXMLException, KettleMissingPluginsException {
KettleMissingPluginsException missingPluginsException = new KettleMissingPluginsException(BaseMessages.getString(PKG, "TransMeta.MissingPluginsFoundWhileLoadingTransformation.Exception"));
// Remember this as the primary meta store.
this.metaStore = metaStore;
try {
Props props = null;
if (Props.isInitialized()) {
props = Props.getInstance();
try {
// Clear the transformation
// Set the filename here so it can be used in variables for ALL aspects of the transformation FIX: PDI-8890
if (null == rep) {
} else {
// Set the repository here so it can be used in variables for ALL aspects of the job FIX: PDI-16441
try {
sharedObjectsFile = XMLHandler.getTagValue(transnode, "info", "shared_objects_file");
sharedObjects = rep != null ? rep.readTransSharedObjects(this) : readSharedObjects();
} catch (Exception e) {
log.logError(BaseMessages.getString(PKG, "TransMeta.ErrorReadingSharedObjects.Message", e.toString()));
// Load the database connections, slave servers, cluster schemas & partition schemas into this object.
// Handle connections
int n = XMLHandler.countNodes(transnode, DatabaseMeta.XML_TAG);
Set<String> privateTransformationDatabases = new HashSet<>(n);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveConnections", String.valueOf(n)));
for (int i = 0; i < n; i++) {
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtConnection") + i);
Node nodecon = XMLHandler.getSubNodeByNr(transnode, DatabaseMeta.XML_TAG, i);
DatabaseMeta dbcon = new DatabaseMeta(nodecon);
if (!dbcon.isShared()) {
DatabaseMeta exist = findDatabase(dbcon.getName());
if (exist == null) {
} else {
if (!exist.isShared()) {
// otherwise, we just keep the shared connection.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnectionYN", dbcon.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
int idx = indexOfDatabase(exist);
addDatabase(idx, dbcon);
// Read the notes...
Node notepadsnode = XMLHandler.getSubNode(transnode, XML_TAG_NOTEPADS);
int nrnotes = XMLHandler.countNodes(notepadsnode, NotePadMeta.XML_TAG);
for (int i = 0; i < nrnotes; i++) {
Node notepadnode = XMLHandler.getSubNodeByNr(notepadsnode, NotePadMeta.XML_TAG, i);
NotePadMeta ni = new NotePadMeta(notepadnode);
// Handle Steps
int s = XMLHandler.countNodes(transnode, StepMeta.XML_TAG);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.ReadingSteps") + s + " steps...");
for (int i = 0; i < s; i++) {
Node stepnode = XMLHandler.getSubNodeByNr(transnode, StepMeta.XML_TAG, i);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtStep") + i);
StepMeta stepMeta = new StepMeta(stepnode, databases, metaStore);
// for tracing, retain hierarchy
if (stepMeta.isMissing()) {
addMissingTrans((MissingTrans) stepMeta.getStepMetaInterface());
// Check if the step exists and if it's a shared step.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
StepMeta check = findStep(stepMeta.getName());
if (check != null) {
if (!check.isShared()) {
// Don't overwrite shared objects
} else {
// Just keep the drawn flag and location
} else {
// simply add it.
// Read the error handling code of the steps...
Node errorHandlingNode = XMLHandler.getSubNode(transnode, XML_TAG_STEP_ERROR_HANDLING);
int nrErrorHandlers = XMLHandler.countNodes(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG);
for (int i = 0; i < nrErrorHandlers; i++) {
Node stepErrorMetaNode = XMLHandler.getSubNodeByNr(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG, i);
StepErrorMeta stepErrorMeta = new StepErrorMeta(this, stepErrorMetaNode, steps);
if (stepErrorMeta.getSourceStep() != null) {
// a bit of a trick, I know.
for (int i = 0; i < nrSteps(); i++) {
StepMeta stepMeta = getStep(i);
StepMetaInterface sii = stepMeta.getStepMetaInterface();
if (sii != null) {
// Handle Hops
Node ordernode = XMLHandler.getSubNode(transnode, XML_TAG_ORDER);
n = XMLHandler.countNodes(ordernode, TransHopMeta.XML_HOP_TAG);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveHops") + n + " hops...");
for (int i = 0; i < n; i++) {
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtHop") + i);
Node hopnode = XMLHandler.getSubNodeByNr(ordernode, TransHopMeta.XML_HOP_TAG, i);
TransHopMeta hopinf = new TransHopMeta(hopnode, steps);
hopinf.setErrorHop(isErrorNode(errorHandlingNode, hopnode));
// get transformation info:
Node infonode = XMLHandler.getSubNode(transnode, XML_TAG_INFO);
// Name
setName(XMLHandler.getTagValue(infonode, "name"));
// description
description = XMLHandler.getTagValue(infonode, "description");
// extended description
extendedDescription = XMLHandler.getTagValue(infonode, "extended_description");
// trans version
trans_version = XMLHandler.getTagValue(infonode, "trans_version");
// trans status
trans_status = Const.toInt(XMLHandler.getTagValue(infonode, "trans_status"), -1);
String transTypeCode = XMLHandler.getTagValue(infonode, "trans_type");
transformationType = TransformationType.getTransformationTypeByCode(transTypeCode);
if (rep != null) {
String directoryPath = XMLHandler.getTagValue(infonode, "directory");
if (directoryPath != null) {
directory = rep.findDirectory(directoryPath);
if (directory == null) {
// not found
// The root as default
directory = new RepositoryDirectory();
// Read logging table information
Node logNode = XMLHandler.getSubNode(infonode, "log");
if (logNode != null) {
// Backward compatibility...
Node transLogNode = XMLHandler.getSubNode(logNode, TransLogTable.XML_TAG);
if (transLogNode == null) {
// Load the XML
transLogTable.findField(TransLogTable.ID.LINES_READ).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "read")));
transLogTable.findField(TransLogTable.ID.LINES_WRITTEN).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "write")));
transLogTable.findField(TransLogTable.ID.LINES_INPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "input")));
transLogTable.findField(TransLogTable.ID.LINES_OUTPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "output")));
transLogTable.findField(TransLogTable.ID.LINES_UPDATED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "update")));
transLogTable.findField(TransLogTable.ID.LINES_REJECTED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "rejected")));
transLogTable.setConnectionName(XMLHandler.getTagValue(infonode, "log", "connection"));
transLogTable.setSchemaName(XMLHandler.getTagValue(infonode, "log", "schema"));
transLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "table"));
transLogTable.findField(TransLogTable.ID.ID_BATCH).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "use_batchid")));
transLogTable.findField(TransLogTable.ID.LOG_FIELD).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "USE_LOGFIELD")));
transLogTable.setLogSizeLimit(XMLHandler.getTagValue(infonode, "log", "size_limit_lines"));
transLogTable.setLogInterval(XMLHandler.getTagValue(infonode, "log", "interval"));
performanceLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "step_performance_table"));
} else {
transLogTable.loadXML(transLogNode, databases, steps);
Node perfLogNode = XMLHandler.getSubNode(logNode, PerformanceLogTable.XML_TAG);
if (perfLogNode != null) {
performanceLogTable.loadXML(perfLogNode, databases, steps);
Node channelLogNode = XMLHandler.getSubNode(logNode, ChannelLogTable.XML_TAG);
if (channelLogNode != null) {
channelLogTable.loadXML(channelLogNode, databases, steps);
Node stepLogNode = XMLHandler.getSubNode(logNode, StepLogTable.XML_TAG);
if (stepLogNode != null) {
stepLogTable.loadXML(stepLogNode, databases, steps);
Node metricsLogNode = XMLHandler.getSubNode(logNode, MetricsLogTable.XML_TAG);
if (metricsLogNode != null) {
metricsLogTable.loadXML(metricsLogNode, databases, steps);
// Maxdate range options...
String maxdatcon = XMLHandler.getTagValue(infonode, "maxdate", "connection");
maxDateConnection = findDatabase(maxdatcon);
maxDateTable = XMLHandler.getTagValue(infonode, "maxdate", "table");
maxDateField = XMLHandler.getTagValue(infonode, "maxdate", "field");
String offset = XMLHandler.getTagValue(infonode, "maxdate", "offset");
maxDateOffset = Const.toDouble(offset, 0.0);
String mdiff = XMLHandler.getTagValue(infonode, "maxdate", "maxdiff");
maxDateDifference = Const.toDouble(mdiff, 0.0);
// Check the dependencies as far as dates are concerned...
// We calculate BEFORE we run the MAX of these dates
// If the date is larger then enddate, startdate is set to MIN_DATE
Node depsNode = XMLHandler.getSubNode(infonode, XML_TAG_DEPENDENCIES);
int nrDeps = XMLHandler.countNodes(depsNode, TransDependency.XML_TAG);
for (int i = 0; i < nrDeps; i++) {
Node depNode = XMLHandler.getSubNodeByNr(depsNode, TransDependency.XML_TAG, i);
TransDependency transDependency = new TransDependency(depNode, databases);
if (transDependency.getDatabase() != null && transDependency.getFieldname() != null) {
// Read the named parameters.
Node paramsNode = XMLHandler.getSubNode(infonode, XML_TAG_PARAMETERS);
int nrParams = XMLHandler.countNodes(paramsNode, "parameter");
for (int i = 0; i < nrParams; i++) {
Node paramNode = XMLHandler.getSubNodeByNr(paramsNode, "parameter", i);
String paramName = XMLHandler.getTagValue(paramNode, "name");
String defaultValue = XMLHandler.getTagValue(paramNode, "default_value");
String descr = XMLHandler.getTagValue(paramNode, "description");
addParameterDefinition(paramName, defaultValue, descr);
// Read the partitioning schemas
Node partSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_PARTITIONSCHEMAS);
int nrPartSchemas = XMLHandler.countNodes(partSchemasNode, PartitionSchema.XML_TAG);
for (int i = 0; i < nrPartSchemas; i++) {
Node partSchemaNode = XMLHandler.getSubNodeByNr(partSchemasNode, PartitionSchema.XML_TAG, i);
PartitionSchema partitionSchema = new PartitionSchema(partSchemaNode);
// Check if the step exists and if it's a shared step.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
PartitionSchema check = findPartitionSchema(partitionSchema.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwritePartitionSchemaYN", partitionSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
} else {
for (int i = 0; i < nrSteps(); i++) {
StepPartitioningMeta stepPartitioningMeta = getStep(i).getStepPartitioningMeta();
if (stepPartitioningMeta != null) {
StepPartitioningMeta targetStepPartitioningMeta = getStep(i).getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
// Read the slave servers...
Node slaveServersNode = XMLHandler.getSubNode(infonode, XML_TAG_SLAVESERVERS);
int nrSlaveServers = XMLHandler.countNodes(slaveServersNode, SlaveServer.XML_TAG);
for (int i = 0; i < nrSlaveServers; i++) {
Node slaveServerNode = XMLHandler.getSubNodeByNr(slaveServersNode, SlaveServer.XML_TAG, i);
SlaveServer slaveServer = new SlaveServer(slaveServerNode);
if (slaveServer.getName() == null) {
log.logError(BaseMessages.getString(PKG, "TransMeta.Log.WarningWhileCreationSlaveServer", slaveServer.getName()));
// Check if the object exists and if it's a shared object.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
SlaveServer check = findSlaveServer(slaveServer.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteSlaveServerYN", slaveServer.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
} else {
// Read the cluster schemas
Node clusterSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_CLUSTERSCHEMAS);
int nrClusterSchemas = XMLHandler.countNodes(clusterSchemasNode, ClusterSchema.XML_TAG);
for (int i = 0; i < nrClusterSchemas; i++) {
Node clusterSchemaNode = XMLHandler.getSubNodeByNr(clusterSchemasNode, ClusterSchema.XML_TAG, i);
ClusterSchema clusterSchema = new ClusterSchema(clusterSchemaNode, slaveServers);
// Check if the object exists and if it's a shared object.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
ClusterSchema check = findClusterSchema(clusterSchema.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteClusterSchemaYN", clusterSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
} else {
for (int i = 0; i < nrSteps(); i++) {
String srowset = XMLHandler.getTagValue(infonode, "size_rowset");
sizeRowset = Const.toInt(srowset, Const.ROWS_IN_ROWSET);
sleepTimeEmpty = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_empty"), Const.TIMEOUT_GET_MILLIS);
sleepTimeFull = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_full"), Const.TIMEOUT_PUT_MILLIS);
usingUniqueConnections = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "unique_connections"));
feedbackShown = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "feedback_shown"));
feedbackSize = Const.toInt(XMLHandler.getTagValue(infonode, "feedback_size"), Const.ROWS_UPDATE);
usingThreadPriorityManagment = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "using_thread_priorities"));
// Performance monitoring for steps...
capturingStepPerformanceSnapShots = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "capture_step_performance"));
stepPerformanceCapturingDelay = Const.toLong(XMLHandler.getTagValue(infonode, "step_performance_capturing_delay"), 1000);
stepPerformanceCapturingSizeLimit = XMLHandler.getTagValue(infonode, "step_performance_capturing_size_limit");
// Created user/date
createdUser = XMLHandler.getTagValue(infonode, "created_user");
String createDate = XMLHandler.getTagValue(infonode, "created_date");
if (createDate != null) {
createdDate = XMLHandler.stringToDate(createDate);
// Changed user/date
modifiedUser = XMLHandler.getTagValue(infonode, "modified_user");
String modDate = XMLHandler.getTagValue(infonode, "modified_date");
if (modDate != null) {
modifiedDate = XMLHandler.stringToDate(modDate);
Node partitionDistNode = XMLHandler.getSubNode(transnode, SlaveStepCopyPartitionDistribution.XML_TAG);
if (partitionDistNode != null) {
slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution(partitionDistNode);
} else {
// leave empty
slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution();
// Is this a slave transformation?
slaveTransformation = "Y".equalsIgnoreCase(XMLHandler.getTagValue(transnode, "slave_transformation"));
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfStepsReaded") + nrSteps());
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfHopsReaded") + nrTransHops());
// Load the attribute groups map
attributesMap = AttributesUtil.loadAttributes(XMLHandler.getSubNode(transnode, AttributesUtil.XML_TAG));
keyForSessionKey = XMLHandler.stringToBinary(XMLHandler.getTagValue(infonode, "key_for_session_key"));
isKeyPrivate = "Y".equals(XMLHandler.getTagValue(infonode, "is_key_private"));
} catch (KettleXMLException xe) {
throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), xe);
} catch (KettleException e) {
throw new KettleXMLException(e);
} finally {
if (setInternalVariables) {
ExtensionPointHandler.callExtensionPoint(log,, this);
} catch (Exception e) {
if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
throw missingPluginsException;
} else {
throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), e);
} finally {
if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
throw missingPluginsException;
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransSplitter method generateSlavePartitionSchemas.
* We want to divide the available partitions over the slaves. Let's create a hashtable that contains the partition
* schema's Since we can only use a single cluster, we can divide them all over a single set of slave servers.
* @throws KettleException
private void generateSlavePartitionSchemas() throws KettleException {
slaveServerPartitionsMap = new Hashtable<SlaveServer, Map<PartitionSchema, List<String>>>();
for (int i = 0; i < referenceSteps.length; i++) {
StepMeta stepMeta = referenceSteps[i];
StepPartitioningMeta stepPartitioningMeta = stepMeta.getStepPartitioningMeta();
if (stepPartitioningMeta == null) {
if (stepPartitioningMeta.getMethodType() == StepPartitioningMeta.PARTITIONING_METHOD_NONE) {
ClusterSchema clusterSchema = stepMeta.getClusterSchema();
if (clusterSchema == null) {
// Make a copy of the partition schema because we might change the object.
// Let's not alter the original transformation.
// The match is done on name, and the name is preserved in this case, so it should be safe to do so.
// Also, all cloned steps re-match with the cloned schema name afterwards...
PartitionSchema partitionSchema = (PartitionSchema) stepPartitioningMeta.getPartitionSchema().clone();
int nrSlaves = clusterSchema.findNrSlaves();
if (nrSlaves == 0) {
// no slaves: ignore this situation too
if (partitionSchema.isDynamicallyDefined()) {
partitionSchema.expandPartitionsDynamically(nrSlaves, originalTransformation);
int nrPartitions = partitionSchema.getPartitionIDs().size();
if (nrPartitions < nrSlaves) {
throw new KettleException("It doesn't make sense to have a partitioned, clustered step with less partitions (" + nrPartitions + ") than that there are slave servers (" + nrSlaves + ")");
int slaveServerNr = 0;
List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();
for (int p = 0; p < nrPartitions; p++) {
String partitionId = partitionSchema.getPartitionIDs().get(p);
SlaveServer slaveServer = slaveServers.get(slaveServerNr);
if (slaveServer.isMaster()) {
if (slaveServerNr >= slaveServers.size()) {
// re-start
slaveServerNr = 0;
slaveServer = slaveServers.get(slaveServerNr);
Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
if (schemaPartitionsMap == null) {
// Add the schema-partitions map to the the slave server
schemaPartitionsMap = new HashMap<PartitionSchema, List<String>>();
slaveServerPartitionsMap.put(slaveServer, schemaPartitionsMap);
// See if we find a list of partitions
List<String> partitions = schemaPartitionsMap.get(partitionSchema);
if (partitions == null) {
partitions = new ArrayList<String>();
schemaPartitionsMap.put(partitionSchema, partitions);
if (partitions.indexOf(partitionId) < 0) {
// Switch to next slave.
if (slaveServerNr >= clusterSchema.getSlaveServers().size()) {
// re-start
slaveServerNr = 0;
// System.out.println("We have "+(slaveServerPartitionsMap.size())+" entries in the slave server partitions map");
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransSplitter method addSlaveCopy.
* Create a copy of a step from the original transformation for use in the a slave transformation. If the step is
* partitioned, the partitioning will be changed to "schemaName (slave)"
* @param stepMeta
* The step to copy / clone.
* @return a copy of the specified step for use in a slave transformation.
private StepMeta addSlaveCopy(TransMeta transMeta, StepMeta stepMeta, SlaveServer slaveServer) {
StepMeta copy = (StepMeta) stepMeta.clone();
if (copy.isPartitioned()) {
StepPartitioningMeta stepPartitioningMeta = copy.getStepPartitioningMeta();
PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
String slavePartitionSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
PartitionSchema slaveSchema = transMeta.findPartitionSchema(slavePartitionSchemaName);
if (slaveSchema != null) {
// Always just start a single copy on the slave server...
// Otherwise the confusion w.r.t. to partitioning & re-partitioning would be complete.
// Remove the clustering information on the slave transformation step
// We don't need it anymore, it only confuses.
return copy;