use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransMeta method loadXML.
/**
* Parses an XML DOM (starting at the specified Node) that describes the transformation.
*
* @param transnode
* The XML node to load from
* @param fname
* The filename
* @param rep
* The repository to load the default list of database connections from (null if no repository is available)
* @param setInternalVariables
* true if you want to set the internal variables based on this transformation information
* @param parentVariableSpace
* the parent variable space to use during TransMeta construction
* @param prompter
* the changed/replace listener or null if there is none
* @throws KettleXMLException
* if any errors occur during parsing of the specified file
* @throws KettleMissingPluginsException
* in case missing plugins were found (details are in the exception in that case)
*/
public void loadXML(Node transnode, String fname, IMetaStore metaStore, Repository rep, boolean setInternalVariables, VariableSpace parentVariableSpace, OverwritePrompter prompter) throws KettleXMLException, KettleMissingPluginsException {
KettleMissingPluginsException missingPluginsException = new KettleMissingPluginsException(BaseMessages.getString(PKG, "TransMeta.MissingPluginsFoundWhileLoadingTransformation.Exception"));
// Remember this as the primary meta store.
this.metaStore = metaStore;
try {
Props props = null;
if (Props.isInitialized()) {
props = Props.getInstance();
}
initializeVariablesFrom(parentVariableSpace);
try {
// Clear the transformation
clear();
// Set the filename here so it can be used in variables for ALL aspects of the transformation FIX: PDI-8890
if (null == rep) {
setFilename(fname);
} else {
// Set the repository here so it can be used in variables for ALL aspects of the job FIX: PDI-16441
setRepository(rep);
}
//
try {
sharedObjectsFile = XMLHandler.getTagValue(transnode, "info", "shared_objects_file");
sharedObjects = rep != null ? rep.readTransSharedObjects(this) : readSharedObjects();
} catch (Exception e) {
log.logError(BaseMessages.getString(PKG, "TransMeta.ErrorReadingSharedObjects.Message", e.toString()));
log.logError(Const.getStackTracker(e));
}
// Load the database connections, slave servers, cluster schemas & partition schemas into this object.
//
importFromMetaStore();
// Handle connections
int n = XMLHandler.countNodes(transnode, DatabaseMeta.XML_TAG);
Set<String> privateTransformationDatabases = new HashSet<>(n);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveConnections", String.valueOf(n)));
}
for (int i = 0; i < n; i++) {
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtConnection") + i);
}
Node nodecon = XMLHandler.getSubNodeByNr(transnode, DatabaseMeta.XML_TAG, i);
DatabaseMeta dbcon = new DatabaseMeta(nodecon);
dbcon.shareVariablesWith(this);
if (!dbcon.isShared()) {
privateTransformationDatabases.add(dbcon.getName());
}
DatabaseMeta exist = findDatabase(dbcon.getName());
if (exist == null) {
addDatabase(dbcon);
} else {
if (!exist.isShared()) {
// otherwise, we just keep the shared connection.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnectionYN", dbcon.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
int idx = indexOfDatabase(exist);
removeDatabase(idx);
addDatabase(idx, dbcon);
}
}
}
}
setPrivateDatabases(privateTransformationDatabases);
// Read the notes...
Node notepadsnode = XMLHandler.getSubNode(transnode, XML_TAG_NOTEPADS);
int nrnotes = XMLHandler.countNodes(notepadsnode, NotePadMeta.XML_TAG);
for (int i = 0; i < nrnotes; i++) {
Node notepadnode = XMLHandler.getSubNodeByNr(notepadsnode, NotePadMeta.XML_TAG, i);
NotePadMeta ni = new NotePadMeta(notepadnode);
notes.add(ni);
}
// Handle Steps
int s = XMLHandler.countNodes(transnode, StepMeta.XML_TAG);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.ReadingSteps") + s + " steps...");
}
for (int i = 0; i < s; i++) {
Node stepnode = XMLHandler.getSubNodeByNr(transnode, StepMeta.XML_TAG, i);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtStep") + i);
}
StepMeta stepMeta = new StepMeta(stepnode, databases, metaStore);
// for tracing, retain hierarchy
stepMeta.setParentTransMeta(this);
if (stepMeta.isMissing()) {
addMissingTrans((MissingTrans) stepMeta.getStepMetaInterface());
}
// Check if the step exists and if it's a shared step.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
//
StepMeta check = findStep(stepMeta.getName());
if (check != null) {
if (!check.isShared()) {
// Don't overwrite shared objects
addOrReplaceStep(stepMeta);
} else {
// Just keep the drawn flag and location
check.setDraw(stepMeta.isDrawn());
check.setLocation(stepMeta.getLocation());
}
} else {
// simply add it.
addStep(stepMeta);
}
}
// Read the error handling code of the steps...
//
Node errorHandlingNode = XMLHandler.getSubNode(transnode, XML_TAG_STEP_ERROR_HANDLING);
int nrErrorHandlers = XMLHandler.countNodes(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG);
for (int i = 0; i < nrErrorHandlers; i++) {
Node stepErrorMetaNode = XMLHandler.getSubNodeByNr(errorHandlingNode, StepErrorMeta.XML_ERROR_TAG, i);
StepErrorMeta stepErrorMeta = new StepErrorMeta(this, stepErrorMetaNode, steps);
if (stepErrorMeta.getSourceStep() != null) {
// a bit of a trick, I know.
stepErrorMeta.getSourceStep().setStepErrorMeta(stepErrorMeta);
}
}
//
for (int i = 0; i < nrSteps(); i++) {
StepMeta stepMeta = getStep(i);
StepMetaInterface sii = stepMeta.getStepMetaInterface();
if (sii != null) {
sii.searchInfoAndTargetSteps(steps);
}
}
// Handle Hops
//
Node ordernode = XMLHandler.getSubNode(transnode, XML_TAG_ORDER);
n = XMLHandler.countNodes(ordernode, TransHopMeta.XML_HOP_TAG);
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.WeHaveHops") + n + " hops...");
}
for (int i = 0; i < n; i++) {
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.LookingAtHop") + i);
}
Node hopnode = XMLHandler.getSubNodeByNr(ordernode, TransHopMeta.XML_HOP_TAG, i);
TransHopMeta hopinf = new TransHopMeta(hopnode, steps);
hopinf.setErrorHop(isErrorNode(errorHandlingNode, hopnode));
addTransHop(hopinf);
}
//
// get transformation info:
//
Node infonode = XMLHandler.getSubNode(transnode, XML_TAG_INFO);
// Name
//
setName(XMLHandler.getTagValue(infonode, "name"));
// description
//
description = XMLHandler.getTagValue(infonode, "description");
// extended description
//
extendedDescription = XMLHandler.getTagValue(infonode, "extended_description");
// trans version
//
trans_version = XMLHandler.getTagValue(infonode, "trans_version");
// trans status
//
trans_status = Const.toInt(XMLHandler.getTagValue(infonode, "trans_status"), -1);
String transTypeCode = XMLHandler.getTagValue(infonode, "trans_type");
transformationType = TransformationType.getTransformationTypeByCode(transTypeCode);
//
if (rep != null) {
String directoryPath = XMLHandler.getTagValue(infonode, "directory");
if (directoryPath != null) {
directory = rep.findDirectory(directoryPath);
if (directory == null) {
// not found
// The root as default
directory = new RepositoryDirectory();
}
}
}
// Read logging table information
//
Node logNode = XMLHandler.getSubNode(infonode, "log");
if (logNode != null) {
// Backward compatibility...
//
Node transLogNode = XMLHandler.getSubNode(logNode, TransLogTable.XML_TAG);
if (transLogNode == null) {
// Load the XML
//
transLogTable.findField(TransLogTable.ID.LINES_READ).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "read")));
transLogTable.findField(TransLogTable.ID.LINES_WRITTEN).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "write")));
transLogTable.findField(TransLogTable.ID.LINES_INPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "input")));
transLogTable.findField(TransLogTable.ID.LINES_OUTPUT).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "output")));
transLogTable.findField(TransLogTable.ID.LINES_UPDATED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "update")));
transLogTable.findField(TransLogTable.ID.LINES_REJECTED).setSubject(findStep(XMLHandler.getTagValue(infonode, "log", "rejected")));
transLogTable.setConnectionName(XMLHandler.getTagValue(infonode, "log", "connection"));
transLogTable.setSchemaName(XMLHandler.getTagValue(infonode, "log", "schema"));
transLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "table"));
transLogTable.findField(TransLogTable.ID.ID_BATCH).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "use_batchid")));
transLogTable.findField(TransLogTable.ID.LOG_FIELD).setEnabled("Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "log", "USE_LOGFIELD")));
transLogTable.setLogSizeLimit(XMLHandler.getTagValue(infonode, "log", "size_limit_lines"));
transLogTable.setLogInterval(XMLHandler.getTagValue(infonode, "log", "interval"));
transLogTable.findField(TransLogTable.ID.CHANNEL_ID).setEnabled(false);
transLogTable.findField(TransLogTable.ID.LINES_REJECTED).setEnabled(false);
performanceLogTable.setConnectionName(transLogTable.getConnectionName());
performanceLogTable.setTableName(XMLHandler.getTagValue(infonode, "log", "step_performance_table"));
} else {
transLogTable.loadXML(transLogNode, databases, steps);
}
Node perfLogNode = XMLHandler.getSubNode(logNode, PerformanceLogTable.XML_TAG);
if (perfLogNode != null) {
performanceLogTable.loadXML(perfLogNode, databases, steps);
}
Node channelLogNode = XMLHandler.getSubNode(logNode, ChannelLogTable.XML_TAG);
if (channelLogNode != null) {
channelLogTable.loadXML(channelLogNode, databases, steps);
}
Node stepLogNode = XMLHandler.getSubNode(logNode, StepLogTable.XML_TAG);
if (stepLogNode != null) {
stepLogTable.loadXML(stepLogNode, databases, steps);
}
Node metricsLogNode = XMLHandler.getSubNode(logNode, MetricsLogTable.XML_TAG);
if (metricsLogNode != null) {
metricsLogTable.loadXML(metricsLogNode, databases, steps);
}
}
// Maxdate range options...
String maxdatcon = XMLHandler.getTagValue(infonode, "maxdate", "connection");
maxDateConnection = findDatabase(maxdatcon);
maxDateTable = XMLHandler.getTagValue(infonode, "maxdate", "table");
maxDateField = XMLHandler.getTagValue(infonode, "maxdate", "field");
String offset = XMLHandler.getTagValue(infonode, "maxdate", "offset");
maxDateOffset = Const.toDouble(offset, 0.0);
String mdiff = XMLHandler.getTagValue(infonode, "maxdate", "maxdiff");
maxDateDifference = Const.toDouble(mdiff, 0.0);
// Check the dependencies as far as dates are concerned...
// We calculate BEFORE we run the MAX of these dates
// If the date is larger then enddate, startdate is set to MIN_DATE
//
Node depsNode = XMLHandler.getSubNode(infonode, XML_TAG_DEPENDENCIES);
int nrDeps = XMLHandler.countNodes(depsNode, TransDependency.XML_TAG);
for (int i = 0; i < nrDeps; i++) {
Node depNode = XMLHandler.getSubNodeByNr(depsNode, TransDependency.XML_TAG, i);
TransDependency transDependency = new TransDependency(depNode, databases);
if (transDependency.getDatabase() != null && transDependency.getFieldname() != null) {
addDependency(transDependency);
}
}
// Read the named parameters.
Node paramsNode = XMLHandler.getSubNode(infonode, XML_TAG_PARAMETERS);
int nrParams = XMLHandler.countNodes(paramsNode, "parameter");
for (int i = 0; i < nrParams; i++) {
Node paramNode = XMLHandler.getSubNodeByNr(paramsNode, "parameter", i);
String paramName = XMLHandler.getTagValue(paramNode, "name");
String defaultValue = XMLHandler.getTagValue(paramNode, "default_value");
String descr = XMLHandler.getTagValue(paramNode, "description");
addParameterDefinition(paramName, defaultValue, descr);
}
// Read the partitioning schemas
//
Node partSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_PARTITIONSCHEMAS);
int nrPartSchemas = XMLHandler.countNodes(partSchemasNode, PartitionSchema.XML_TAG);
for (int i = 0; i < nrPartSchemas; i++) {
Node partSchemaNode = XMLHandler.getSubNodeByNr(partSchemasNode, PartitionSchema.XML_TAG, i);
PartitionSchema partitionSchema = new PartitionSchema(partSchemaNode);
// Check if the step exists and if it's a shared step.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
//
PartitionSchema check = findPartitionSchema(partitionSchema.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwritePartitionSchemaYN", partitionSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
addOrReplacePartitionSchema(partitionSchema);
}
}
} else {
partitionSchemas.add(partitionSchema);
}
}
//
for (int i = 0; i < nrSteps(); i++) {
StepPartitioningMeta stepPartitioningMeta = getStep(i).getStepPartitioningMeta();
if (stepPartitioningMeta != null) {
stepPartitioningMeta.setPartitionSchemaAfterLoading(partitionSchemas);
}
StepPartitioningMeta targetStepPartitioningMeta = getStep(i).getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
targetStepPartitioningMeta.setPartitionSchemaAfterLoading(partitionSchemas);
}
}
// Read the slave servers...
//
Node slaveServersNode = XMLHandler.getSubNode(infonode, XML_TAG_SLAVESERVERS);
int nrSlaveServers = XMLHandler.countNodes(slaveServersNode, SlaveServer.XML_TAG);
for (int i = 0; i < nrSlaveServers; i++) {
Node slaveServerNode = XMLHandler.getSubNodeByNr(slaveServersNode, SlaveServer.XML_TAG, i);
SlaveServer slaveServer = new SlaveServer(slaveServerNode);
if (slaveServer.getName() == null) {
log.logError(BaseMessages.getString(PKG, "TransMeta.Log.WarningWhileCreationSlaveServer", slaveServer.getName()));
continue;
}
slaveServer.shareVariablesWith(this);
// Check if the object exists and if it's a shared object.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
SlaveServer check = findSlaveServer(slaveServer.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteSlaveServerYN", slaveServer.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
addOrReplaceSlaveServer(slaveServer);
}
}
} else {
slaveServers.add(slaveServer);
}
}
// Read the cluster schemas
//
Node clusterSchemasNode = XMLHandler.getSubNode(infonode, XML_TAG_CLUSTERSCHEMAS);
int nrClusterSchemas = XMLHandler.countNodes(clusterSchemasNode, ClusterSchema.XML_TAG);
for (int i = 0; i < nrClusterSchemas; i++) {
Node clusterSchemaNode = XMLHandler.getSubNodeByNr(clusterSchemasNode, ClusterSchema.XML_TAG, i);
ClusterSchema clusterSchema = new ClusterSchema(clusterSchemaNode, slaveServers);
clusterSchema.shareVariablesWith(this);
// Check if the object exists and if it's a shared object.
// If so, then we will keep the shared version, not this one.
// The stored XML is only for backup purposes.
ClusterSchema check = findClusterSchema(clusterSchema.getName());
if (check != null) {
if (!check.isShared()) {
// we don't overwrite shared objects.
if (shouldOverwrite(prompter, props, BaseMessages.getString(PKG, "TransMeta.Message.OverwriteClusterSchemaYN", clusterSchema.getName()), BaseMessages.getString(PKG, "TransMeta.Message.OverwriteConnection.DontShowAnyMoreMessage"))) {
addOrReplaceClusterSchema(clusterSchema);
}
}
} else {
clusterSchemas.add(clusterSchema);
}
}
//
for (int i = 0; i < nrSteps(); i++) {
getStep(i).setClusterSchemaAfterLoading(clusterSchemas);
}
String srowset = XMLHandler.getTagValue(infonode, "size_rowset");
sizeRowset = Const.toInt(srowset, Const.ROWS_IN_ROWSET);
sleepTimeEmpty = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_empty"), Const.TIMEOUT_GET_MILLIS);
sleepTimeFull = Const.toInt(XMLHandler.getTagValue(infonode, "sleep_time_full"), Const.TIMEOUT_PUT_MILLIS);
usingUniqueConnections = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "unique_connections"));
feedbackShown = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "feedback_shown"));
feedbackSize = Const.toInt(XMLHandler.getTagValue(infonode, "feedback_size"), Const.ROWS_UPDATE);
usingThreadPriorityManagment = !"N".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "using_thread_priorities"));
// Performance monitoring for steps...
//
capturingStepPerformanceSnapShots = "Y".equalsIgnoreCase(XMLHandler.getTagValue(infonode, "capture_step_performance"));
stepPerformanceCapturingDelay = Const.toLong(XMLHandler.getTagValue(infonode, "step_performance_capturing_delay"), 1000);
stepPerformanceCapturingSizeLimit = XMLHandler.getTagValue(infonode, "step_performance_capturing_size_limit");
// Created user/date
createdUser = XMLHandler.getTagValue(infonode, "created_user");
String createDate = XMLHandler.getTagValue(infonode, "created_date");
if (createDate != null) {
createdDate = XMLHandler.stringToDate(createDate);
}
// Changed user/date
modifiedUser = XMLHandler.getTagValue(infonode, "modified_user");
String modDate = XMLHandler.getTagValue(infonode, "modified_date");
if (modDate != null) {
modifiedDate = XMLHandler.stringToDate(modDate);
}
Node partitionDistNode = XMLHandler.getSubNode(transnode, SlaveStepCopyPartitionDistribution.XML_TAG);
if (partitionDistNode != null) {
slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution(partitionDistNode);
} else {
// leave empty
slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution();
}
// Is this a slave transformation?
//
slaveTransformation = "Y".equalsIgnoreCase(XMLHandler.getTagValue(transnode, "slave_transformation"));
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfStepsReaded") + nrSteps());
log.logDebug(BaseMessages.getString(PKG, "TransMeta.Log.NumberOfHopsReaded") + nrTransHops());
}
sortSteps();
// Load the attribute groups map
//
attributesMap = AttributesUtil.loadAttributes(XMLHandler.getSubNode(transnode, AttributesUtil.XML_TAG));
keyForSessionKey = XMLHandler.stringToBinary(XMLHandler.getTagValue(infonode, "key_for_session_key"));
isKeyPrivate = "Y".equals(XMLHandler.getTagValue(infonode, "is_key_private"));
} catch (KettleXMLException xe) {
throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), xe);
} catch (KettleException e) {
throw new KettleXMLException(e);
} finally {
initializeVariablesFrom(null);
if (setInternalVariables) {
setInternalKettleVariables();
}
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationMetaLoaded.id, this);
}
} catch (Exception e) {
//
if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
throw missingPluginsException;
} else {
throw new KettleXMLException(BaseMessages.getString(PKG, "TransMeta.Exception.ErrorReadingTransformation"), e);
}
} finally {
if (!missingPluginsException.getMissingPluginDetailsList().isEmpty()) {
throw missingPluginsException;
}
}
}
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class Trans method prepareExecution.
/**
* Prepares the transformation for execution. This includes setting the arguments and parameters as well as preparing
* and tracking the steps and hops in the transformation.
*
* @param arguments the arguments to use for this transformation
* @throws KettleException in case the transformation could not be prepared (initialized)
*/
public void prepareExecution(String[] arguments) throws KettleException {
setPreparing(true);
startDate = null;
setRunning(false);
log.snap(Metrics.METRIC_TRANSFORMATION_EXECUTION_START);
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_START);
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.TransformationPrepareExecution.id, this);
checkCompatibility();
//
if (arguments != null) {
setArguments(arguments);
}
if (parentTrans != null) {
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentTrans, log));
} else {
// If there is no parent, one of these still needs to be called to instantiate a new cache
IMetaFileCache.setCacheInstance(transMeta, IMetaFileCache.initialize(parentJob, log));
}
activateParameters();
transMeta.activateParameters();
ConnectionUtil.init(transMeta);
if (transMeta.getName() == null) {
if (transMeta.getFilename() != null) {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForFilename", transMeta.getFilename()));
}
} else {
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.DispacthingStartedForTransformation", transMeta.getName()));
}
if (getArguments() != null) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.NumberOfArgumentsDetected", String.valueOf(getArguments().length)));
}
}
if (isSafeModeEnabled()) {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.SafeModeIsEnabled", transMeta.getName()));
}
}
if (getReplayDate() != null) {
SimpleDateFormat df = new SimpleDateFormat(REPLAY_DATE_FORMAT);
log.logBasic(BaseMessages.getString(PKG, "Trans.Log.ThisIsAReplayTransformation") + df.format(getReplayDate()));
} else {
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.ThisIsNotAReplayTransformation"));
}
}
//
if (servletPrintWriter == null) {
String encoding = System.getProperty("KETTLE_DEFAULT_SERVLET_ENCODING", null);
if (encoding == null) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
} else {
try {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out, encoding));
} catch (UnsupportedEncodingException ex) {
servletPrintWriter = new PrintWriter(new OutputStreamWriter(System.out));
}
}
}
// Keep track of all the row sets and allocated steps
//
steps = new ArrayList<>();
rowsets = new ArrayList<>();
List<StepMeta> hopsteps = transMeta.getTransHopSteps(false);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.FoundDefferentSteps", String.valueOf(hopsteps.size())));
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingRowsets"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta thisStep = hopsteps.get(i);
if (thisStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocateingRowsetsForStep", String.valueOf(i), thisStep.getName()));
}
List<StepMeta> nextSteps = transMeta.findNextSteps(thisStep);
int nrTargets = nextSteps.size();
for (int n = 0; n < nrTargets; n++) {
// What's the next step?
StepMeta nextStep = nextSteps.get(n);
if (nextStep.isMapping()) {
// handled and allocated by the mapping step itself.
continue;
}
// How many times do we start the source step?
int thisCopies = thisStep.getCopies();
if (thisCopies < 0) {
//
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.StepCopiesNotCorrectlyDefined", thisStep.getName()));
}
// How many times do we start the target step?
int nextCopies = nextStep.getCopies();
// Are we re-partitioning?
boolean repartitioning;
if (thisStep.isPartitioned()) {
repartitioning = !thisStep.getStepPartitioningMeta().equals(nextStep.getStepPartitioningMeta());
} else {
repartitioning = nextStep.isPartitioned();
}
int nrCopies;
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.copiesInfo", String.valueOf(thisCopies), String.valueOf(nextCopies)));
}
int dispatchType;
if (thisCopies == 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_1_1;
nrCopies = 1;
} else if (thisCopies == 1 && nextCopies > 1) {
dispatchType = TYPE_DISP_1_N;
nrCopies = nextCopies;
} else if (thisCopies > 1 && nextCopies == 1) {
dispatchType = TYPE_DISP_N_1;
nrCopies = thisCopies;
} else if (thisCopies == nextCopies && !repartitioning) {
dispatchType = TYPE_DISP_N_N;
nrCopies = nextCopies;
} else {
// > 1!
dispatchType = TYPE_DISP_N_M;
nrCopies = nextCopies;
}
//
if (dispatchType != TYPE_DISP_N_M) {
for (int c = 0; c < nrCopies; c++) {
RowSet rowSet;
switch(transMeta.getTransformationType()) {
case Normal:
// This is a temporary patch until the batching rowset has proven
// to be working in all situations.
// Currently there are stalling problems when dealing with small
// amounts of rows.
//
Boolean batchingRowSet = ValueMetaString.convertStringToBoolean(System.getProperty(Const.KETTLE_BATCHING_ROWSET));
if (batchingRowSet != null && batchingRowSet.booleanValue()) {
rowSet = new BlockingBatchingRowSet(transMeta.getSizeRowset());
} else {
rowSet = new BlockingRowSet(transMeta.getSizeRowset());
}
break;
case SerialSingleThreaded:
rowSet = new SingleRowRowSet();
break;
case SingleThreaded:
rowSet = new QueueRowSet();
break;
default:
throw new KettleException("Unhandled transformation type: " + transMeta.getTransformationType());
}
switch(dispatchType) {
case TYPE_DISP_1_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), 0);
break;
case TYPE_DISP_1_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), 0, nextStep.getName(), c);
break;
case TYPE_DISP_N_1:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), 0);
break;
case TYPE_DISP_N_N:
rowSet.setThreadNameFromToCopy(thisStep.getName(), c, nextStep.getName(), c);
break;
default:
break;
}
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
} else {
// distribution...
for (int s = 0; s < thisCopies; s++) {
for (int t = 0; t < nextCopies; t++) {
BlockingRowSet rowSet = new BlockingRowSet(transMeta.getSizeRowset());
rowSet.setThreadNameFromToCopy(thisStep.getName(), s, nextStep.getName(), t);
rowsets.add(rowSet);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.TransformationAllocatedNewRowset", rowSet.toString()));
}
}
}
}
}
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatedRowsets", String.valueOf(rowsets.size()), String.valueOf(i), thisStep.getName()) + " ");
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.AllocatingStepsAndStepData"));
}
//
for (int i = 0; i < hopsteps.size(); i++) {
StepMeta stepMeta = hopsteps.get(i);
String stepid = stepMeta.getStepID();
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationIsToAllocateStep", stepMeta.getName(), stepid));
}
// How many copies are launched of this step?
int nrCopies = stepMeta.getCopies();
if (log.isDebug()) {
log.logDebug(BaseMessages.getString(PKG, "Trans.Log.StepHasNumberRowCopies", String.valueOf(nrCopies)));
}
// At least run once...
for (int c = 0; c < nrCopies; c++) {
// Make sure we haven't started it yet!
if (!hasStepStarted(stepMeta.getName(), c)) {
StepMetaDataCombi combi = new StepMetaDataCombi();
combi.stepname = stepMeta.getName();
combi.copy = c;
// The meta-data
combi.stepMeta = stepMeta;
combi.meta = stepMeta.getStepMetaInterface();
// Allocate the step data
StepDataInterface data = combi.meta.getStepData();
combi.data = data;
// Allocate the step
StepInterface step = combi.meta.getStep(stepMeta, data, c, transMeta, this);
// Copy the variables of the transformation to the step...
// don't share. Each copy of the step has its own variables.
//
step.initializeVariablesFrom(this);
step.setUsingThreadPriorityManagment(transMeta.isUsingThreadPriorityManagment());
// Pass the connected repository & metaStore to the steps runtime
//
step.setRepository(repository);
step.setMetaStore(metaStore);
// things as well...
if (stepMeta.isPartitioned()) {
List<String> partitionIDs = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
if (partitionIDs != null && !partitionIDs.isEmpty()) {
// Pass the partition ID
step.setPartitionID(partitionIDs.get(c));
// to the step
}
}
// Save the step too
combi.step = step;
// /
if (combi.step instanceof LoggingObjectInterface) {
LogChannelInterface logChannel = combi.step.getLogChannel();
logChannel.setLogLevel(logLevel);
logChannel.setGatheringMetrics(log.isGatheringMetrics());
}
// Add to the bunch...
steps.add(combi);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.TransformationHasAllocatedANewStep", stepMeta.getName(), String.valueOf(c)));
}
}
}
}
//
for (int s = 0; s < steps.size(); s++) {
StepMetaDataCombi combi = steps.get(s);
if (combi.stepMeta.isDoingErrorHandling()) {
combi.step.identifyErrorOutput();
}
}
// Now (optionally) write start log record!
// Make sure we synchronize appropriately to avoid duplicate batch IDs.
//
Object syncObject = this;
if (parentJob != null) {
// parallel execution in a job
syncObject = parentJob;
}
if (parentTrans != null) {
// multiple sub-transformations
syncObject = parentTrans;
}
synchronized (syncObject) {
calculateBatchIdAndDateRange();
beginProcessing();
}
//
for (int i = 0; i < steps.size(); i++) {
StepMetaDataCombi sid = steps.get(i);
StepMeta stepMeta = sid.stepMeta;
StepInterface baseStep = sid.step;
baseStep.setPartitioned(stepMeta.isPartitioned());
// Now let's take a look at the source and target relation
//
// If this source step is not partitioned, and the target step is: it
// means we need to re-partition the incoming data.
// If both steps are partitioned on the same method and schema, we don't
// need to re-partition
// If both steps are partitioned on a different method or schema, we need
// to re-partition as well.
// If both steps are not partitioned, we don't need to re-partition
//
boolean isThisPartitioned = stepMeta.isPartitioned();
PartitionSchema thisPartitionSchema = null;
if (isThisPartitioned) {
thisPartitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
}
boolean isNextPartitioned = false;
StepPartitioningMeta nextStepPartitioningMeta = null;
PartitionSchema nextPartitionSchema = null;
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
int nrNext = nextSteps.size();
for (int p = 0; p < nrNext; p++) {
StepMeta nextStep = nextSteps.get(p);
if (nextStep.isPartitioned()) {
isNextPartitioned = true;
nextStepPartitioningMeta = nextStep.getStepPartitioningMeta();
nextPartitionSchema = nextStepPartitioningMeta.getPartitionSchema();
}
}
baseStep.setRepartitioning(StepPartitioningMeta.PARTITIONING_METHOD_NONE);
//
if ((!isThisPartitioned && isNextPartitioned) || (isThisPartitioned && isNextPartitioned && !thisPartitionSchema.equals(nextPartitionSchema))) {
baseStep.setRepartitioning(nextStepPartitioningMeta.getMethodType());
}
// For partitioning to a set of remove steps (repartitioning from a master
// to a set or remote output steps)
//
StepPartitioningMeta targetStepPartitioningMeta = baseStep.getStepMeta().getTargetStepPartitioningMeta();
if (targetStepPartitioningMeta != null) {
baseStep.setRepartitioning(targetStepPartitioningMeta.getMethodType());
}
}
setPreparing(false);
setInitializing(true);
//
if (isMonitored() && steps.size() < 150) {
doTopologySortOfSteps();
}
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.InitialisingSteps", String.valueOf(steps.size())));
}
StepInitThread[] initThreads = new StepInitThread[steps.size()];
Thread[] threads = new Thread[steps.size()];
//
for (int i = 0; i < steps.size(); i++) {
final StepMetaDataCombi sid = steps.get(i);
// Do the init code in the background!
// Init all steps at once, but ALL steps need to finish before we can
// continue properly!
//
initThreads[i] = new StepInitThread(sid, log);
// Put it in a separate thread!
//
threads[i] = new Thread(initThreads[i]);
threads[i].setName("init of " + sid.stepname + "." + sid.copy + " (" + threads[i].getName() + ")");
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepBeforeInitialize.id, initThreads[i]);
threads[i].start();
}
for (int i = 0; i < threads.length; i++) {
try {
threads[i].join();
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.StepAfterInitialize.id, initThreads[i]);
} catch (Exception ex) {
log.logError("Error with init thread: " + ex.getMessage(), ex.getMessage());
log.logError(Const.getStackTracker(ex));
}
}
setInitializing(false);
boolean ok = true;
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
if (!initThreads[i].isOk()) {
log.logError(BaseMessages.getString(PKG, "Trans.Log.StepFailedToInit", combi.stepname + "." + combi.copy));
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
ok = false;
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_IDLE);
if (log.isDetailed()) {
log.logDetailed(BaseMessages.getString(PKG, "Trans.Log.StepInitialized", combi.stepname + "." + combi.copy));
}
}
}
if (!ok) {
// One or more steps failed on initialization.
// Transformation is now stopped.
setStopped(true);
//
for (int i = 0; i < initThreads.length; i++) {
StepMetaDataCombi combi = initThreads[i].getCombi();
// Dispose will overwrite the status, but we set it back right after
// this.
combi.step.dispose(combi.meta, combi.data);
if (initThreads[i].isOk()) {
combi.data.setStatus(StepExecutionStatus.STATUS_HALTED);
} else {
combi.data.setStatus(StepExecutionStatus.STATUS_STOPPED);
}
}
// Just for safety, fire the trans finished listeners...
try {
fireTransFinishedListeners();
} catch (KettleException e) {
// listeners produces errors
log.logError(BaseMessages.getString(PKG, "Trans.FinishListeners.Exception"));
// we will not pass this exception up to prepareExecution() entry point.
} finally {
// Flag the transformation as finished even if exception was thrown
setFinished(true);
}
//
if (preview) {
String logText = KettleLogStore.getAppender().getBuffer(getLogChannelId(), true).toString();
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR + logText);
} else {
throw new KettleException(BaseMessages.getString(PKG, "Trans.Log.FailToInitializeAtLeastOneStep") + Const.CR);
}
}
log.snap(Metrics.METRIC_TRANSFORMATION_INIT_STOP);
KettleEnvironment.setExecutionInformation(this, repository);
setReadyToStart(true);
}
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransSplitter method verifySlavePartitioningConfiguration.
private void verifySlavePartitioningConfiguration(TransMeta slave, StepMeta stepMeta, ClusterSchema clusterSchema, SlaveServer slaveServer) {
Map<StepMeta, String> stepPartitionFlag = slaveStepPartitionFlag.get(slave);
if (stepPartitionFlag == null) {
stepPartitionFlag = new Hashtable<StepMeta, String>();
slaveStepPartitionFlag.put(slave, stepPartitionFlag);
}
if (stepPartitionFlag.get(stepMeta) != null) {
// already done;
return;
}
StepPartitioningMeta partitioningMeta = stepMeta.getStepPartitioningMeta();
if (partitioningMeta != null && partitioningMeta.getMethodType() != StepPartitioningMeta.PARTITIONING_METHOD_NONE && partitioningMeta.getPartitionSchema() != null) {
// Find the schemaPartitions map to use
Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
if (schemaPartitionsMap != null) {
PartitionSchema partitionSchema = partitioningMeta.getPartitionSchema();
List<String> partitionsList = schemaPartitionsMap.get(partitionSchema);
if (partitionsList != null) {
// We found a list of partitions, now let's create a new partition schema with this data.
String targetSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
PartitionSchema targetSchema = slave.findPartitionSchema(targetSchemaName);
if (targetSchema == null) {
targetSchema = new PartitionSchema(targetSchemaName, partitionsList);
// add it to the slave if it doesn't exist.
slave.getPartitionSchemas().add(targetSchema);
}
}
}
}
// is done.
stepPartitionFlag.put(stepMeta, "Y");
}
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransSplitter method generateSlavePartitionSchemas.
/**
* We want to divide the available partitions over the slaves. Let's create a hashtable that contains the partition
* schema's Since we can only use a single cluster, we can divide them all over a single set of slave servers.
*
* @throws KettleException
*/
private void generateSlavePartitionSchemas() throws KettleException {
slaveServerPartitionsMap = new Hashtable<SlaveServer, Map<PartitionSchema, List<String>>>();
for (int i = 0; i < referenceSteps.length; i++) {
StepMeta stepMeta = referenceSteps[i];
StepPartitioningMeta stepPartitioningMeta = stepMeta.getStepPartitioningMeta();
if (stepPartitioningMeta == null) {
continue;
}
if (stepPartitioningMeta.getMethodType() == StepPartitioningMeta.PARTITIONING_METHOD_NONE) {
continue;
}
ClusterSchema clusterSchema = stepMeta.getClusterSchema();
if (clusterSchema == null) {
continue;
}
// Make a copy of the partition schema because we might change the object.
// Let's not alter the original transformation.
// The match is done on name, and the name is preserved in this case, so it should be safe to do so.
// Also, all cloned steps re-match with the cloned schema name afterwards...
//
PartitionSchema partitionSchema = (PartitionSchema) stepPartitioningMeta.getPartitionSchema().clone();
int nrSlaves = clusterSchema.findNrSlaves();
if (nrSlaves == 0) {
// no slaves: ignore this situation too
continue;
}
//
if (partitionSchema.isDynamicallyDefined()) {
partitionSchema.expandPartitionsDynamically(nrSlaves, originalTransformation);
}
int nrPartitions = partitionSchema.getPartitionIDs().size();
if (nrPartitions < nrSlaves) {
throw new KettleException("It doesn't make sense to have a partitioned, clustered step with less partitions (" + nrPartitions + ") than that there are slave servers (" + nrSlaves + ")");
}
int slaveServerNr = 0;
List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();
for (int p = 0; p < nrPartitions; p++) {
String partitionId = partitionSchema.getPartitionIDs().get(p);
SlaveServer slaveServer = slaveServers.get(slaveServerNr);
//
if (slaveServer.isMaster()) {
slaveServerNr++;
if (slaveServerNr >= slaveServers.size()) {
// re-start
slaveServerNr = 0;
}
slaveServer = slaveServers.get(slaveServerNr);
}
Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get(slaveServer);
if (schemaPartitionsMap == null) {
// Add the schema-partitions map to the the slave server
//
schemaPartitionsMap = new HashMap<PartitionSchema, List<String>>();
slaveServerPartitionsMap.put(slaveServer, schemaPartitionsMap);
}
// See if we find a list of partitions
//
List<String> partitions = schemaPartitionsMap.get(partitionSchema);
if (partitions == null) {
partitions = new ArrayList<String>();
schemaPartitionsMap.put(partitionSchema, partitions);
}
//
if (partitions.indexOf(partitionId) < 0) {
partitions.add(partitionId);
}
// Switch to next slave.
slaveServerNr++;
if (slaveServerNr >= clusterSchema.getSlaveServers().size()) {
// re-start
slaveServerNr = 0;
}
}
}
// System.out.println("We have "+(slaveServerPartitionsMap.size())+" entries in the slave server partitions map");
}
use of org.pentaho.di.trans.step.StepPartitioningMeta in project pentaho-kettle by pentaho.
the class TransSplitter method addSlaveCopy.
/**
* Create a copy of a step from the original transformation for use in the a slave transformation. If the step is
* partitioned, the partitioning will be changed to "schemaName (slave)"
*
* @param stepMeta
* The step to copy / clone.
* @return a copy of the specified step for use in a slave transformation.
*/
private StepMeta addSlaveCopy(TransMeta transMeta, StepMeta stepMeta, SlaveServer slaveServer) {
StepMeta copy = (StepMeta) stepMeta.clone();
if (copy.isPartitioned()) {
StepPartitioningMeta stepPartitioningMeta = copy.getStepPartitioningMeta();
PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
String slavePartitionSchemaName = createSlavePartitionSchemaName(partitionSchema.getName());
PartitionSchema slaveSchema = transMeta.findPartitionSchema(slavePartitionSchemaName);
if (slaveSchema != null) {
stepPartitioningMeta.setPartitionSchema(slaveSchema);
}
// Always just start a single copy on the slave server...
// Otherwise the confusion w.r.t. to partitioning & re-partitioning would be complete.
//
copy.setCopies(1);
}
transMeta.addStep(copy);
return copy;
}
Aggregations