use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class JobEntryTrans method execute.
/**
* Execute this job entry and return the result. In this case it means, just set the result boolean in the Result
* class.
*
* @param result The result of the previous execution
* @param nr the job entry number
* @return The Result of the execution.
*/
@Override
public Result execute(Result result, int nr) throws KettleException {
result.setEntryNr(nr);
LogChannelFileWriter logChannelFileWriter = null;
LogLevel transLogLevel = parentJob.getLogLevel();
// Set Embedded NamedCluter MetatStore Provider Key so that it can be passed to VFS
if (parentJobMeta.getNamedClusterEmbedManager() != null) {
parentJobMeta.getNamedClusterEmbedManager().passEmbeddedMetastoreKey(this, parentJobMeta.getEmbeddedMetastoreProviderKey());
}
String realLogFilename = "";
if (setLogfile) {
transLogLevel = logFileLevel;
realLogFilename = environmentSubstitute(getLogFilename());
// if we do not have one, we must fail
if (Utils.isEmpty(realLogFilename)) {
logError(BaseMessages.getString(PKG, "JobTrans.Exception.LogFilenameMissing"));
result.setNrErrors(1);
result.setResult(false);
return result;
}
// create parent folder?
if (!FileUtil.createParentFolder(PKG, realLogFilename, createParentFolder, this.getLogChannel(), this)) {
result.setNrErrors(1);
result.setResult(false);
return result;
}
try {
logChannelFileWriter = new LogChannelFileWriter(this.getLogChannelId(), KettleVFS.getFileObject(realLogFilename, this), setAppendLogfile);
logChannelFileWriter.startLogging();
} catch (KettleException e) {
logError(BaseMessages.getString(PKG, "JobTrans.Error.UnableOpenAppender", realLogFilename, e.toString()));
logError(Const.getStackTracker(e));
result.setNrErrors(1);
result.setResult(false);
return result;
}
}
//
switch(specificationMethod) {
case FILENAME:
if (isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "JobTrans.Log.OpeningTrans", environmentSubstitute(getFilename())));
}
break;
case REPOSITORY_BY_NAME:
if (isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "JobTrans.Log.OpeningTransInDirec", environmentSubstitute(getFilename()), environmentSubstitute(directory)));
}
break;
case REPOSITORY_BY_REFERENCE:
if (isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "JobTrans.Log.OpeningTransByReference", transObjectId));
}
break;
default:
break;
}
// Load the transformation only once for the complete loop!
// Throws an exception if it was not possible to load the transformation. For example, the XML file doesn't exist or
// the repository is down.
// Log the stack trace and return an error condition from this
//
TransMeta transMeta = null;
try {
transMeta = getTransMeta(rep, metaStore, this);
} catch (KettleException e) {
logError(BaseMessages.getString(PKG, "JobTrans.Exception.UnableToRunJob", parentJobMeta.getName(), getName(), StringUtils.trim(e.getMessage())), e);
result.setNrErrors(1);
result.setResult(false);
return result;
}
int iteration = 0;
String[] args1 = arguments;
if (args1 == null || args1.length == 0) {
// No arguments set, look at the parent job.
args1 = parentJob.getArguments();
}
// initializeVariablesFrom(parentJob);
//
// For the moment only do variable translation at the start of a job, not
// for every input row (if that would be switched on). This is for safety,
// the real argument setting is later on.
//
String[] args = null;
if (args1 != null) {
args = new String[args1.length];
for (int idx = 0; idx < args1.length; idx++) {
args[idx] = environmentSubstitute(args1[idx]);
}
}
RowMetaAndData resultRow = null;
boolean first = true;
List<RowMetaAndData> rows = new ArrayList<RowMetaAndData>(result.getRows());
while ((first && !execPerRow) || (execPerRow && rows != null && iteration < rows.size() && result.getNrErrors() == 0) && !parentJob.isStopped()) {
//
if (execPerRow) {
result.getRows().clear();
}
if (rows != null && execPerRow) {
resultRow = rows.get(iteration);
} else {
resultRow = null;
}
NamedParams namedParam = new NamedParamsDefault();
if (parameters != null) {
for (int idx = 0; idx < parameters.length; idx++) {
if (!Utils.isEmpty(parameters[idx])) {
// We have a parameter
//
namedParam.addParameterDefinition(parameters[idx], "", "Job entry runtime");
if (Utils.isEmpty(Const.trim(parameterFieldNames[idx]))) {
// There is no field name specified.
//
String value = Const.NVL(environmentSubstitute(parameterValues[idx]), "");
namedParam.setParameterValue(parameters[idx], value);
} else {
// something filled in, in the field column...
//
String value = "";
if (resultRow != null) {
value = resultRow.getString(parameterFieldNames[idx], "");
}
namedParam.setParameterValue(parameters[idx], value);
}
}
}
}
first = false;
Result previousResult = result;
try {
if (isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "JobTrans.StartingTrans", getFilename(), getName(), getDescription()));
}
if (clearResultRows) {
previousResult.setRows(new ArrayList<RowMetaAndData>());
}
if (clearResultFiles) {
previousResult.getResultFiles().clear();
}
/*
* Set one or more "result" rows on the transformation...
*/
if (execPerRow) {
if (argFromPrevious) {
// Copy the input row to the (command line) arguments
args = null;
if (resultRow != null) {
args = new String[resultRow.size()];
for (int i = 0; i < resultRow.size(); i++) {
args[i] = resultRow.getString(i, null);
}
}
} else {
// Just pass a single row
List<RowMetaAndData> newList = new ArrayList<RowMetaAndData>();
newList.add(resultRow);
// This previous result rows list can be either empty or not.
// Depending on the checkbox "clear result rows"
// In this case, it would execute the transformation with one extra row each time
// Can't figure out a real use-case for it, but hey, who am I to decide that, right?
// :-)
//
previousResult.getRows().addAll(newList);
}
if (paramsFromPrevious) {
if (parameters != null) {
for (int idx = 0; idx < parameters.length; idx++) {
if (!Utils.isEmpty(parameters[idx])) {
// We have a parameter
if (Utils.isEmpty(Const.trim(parameterFieldNames[idx]))) {
namedParam.setParameterValue(parameters[idx], Const.NVL(environmentSubstitute(parameterValues[idx]), ""));
} else {
String fieldValue = "";
if (resultRow != null) {
fieldValue = resultRow.getString(parameterFieldNames[idx], "");
}
// Get the value from the input stream
namedParam.setParameterValue(parameters[idx], Const.NVL(fieldValue, ""));
}
}
}
}
}
} else {
if (argFromPrevious) {
// Only put the first Row on the arguments
args = null;
if (resultRow != null) {
args = new String[resultRow.size()];
for (int i = 0; i < resultRow.size(); i++) {
args[i] = resultRow.getString(i, null);
}
}
}
if (paramsFromPrevious) {
// Copy the input the parameters
if (parameters != null) {
for (int idx = 0; idx < parameters.length; idx++) {
if (!Utils.isEmpty(parameters[idx])) {
// We have a parameter
if (Utils.isEmpty(Const.trim(parameterFieldNames[idx]))) {
namedParam.setParameterValue(parameters[idx], Const.NVL(environmentSubstitute(parameterValues[idx]), ""));
} else {
String fieldValue = "";
if (resultRow != null) {
fieldValue = resultRow.getString(parameterFieldNames[idx], "");
}
// Get the value from the input stream
namedParam.setParameterValue(parameters[idx], Const.NVL(fieldValue, ""));
}
}
}
}
}
}
// Handle the parameters...
//
transMeta.clearParameters();
String[] parameterNames = transMeta.listParameters();
StepWithMappingMeta.activateParams(transMeta, transMeta, this, parameterNames, parameters, parameterValues);
boolean doFallback = true;
SlaveServer remoteSlaveServer = null;
TransExecutionConfiguration executionConfiguration = new TransExecutionConfiguration();
if (!Utils.isEmpty(runConfiguration)) {
log.logBasic(BaseMessages.getString(PKG, "JobTrans.RunConfig.Message"), runConfiguration);
runConfiguration = environmentSubstitute(runConfiguration);
executionConfiguration.setRunConfiguration(runConfiguration);
try {
ExtensionPointHandler.callExtensionPoint(log, KettleExtensionPoint.SpoonTransBeforeStart.id, new Object[] { executionConfiguration, parentJob.getJobMeta(), transMeta, rep });
if (!executionConfiguration.isExecutingLocally() && !executionConfiguration.isExecutingRemotely() && !executionConfiguration.isExecutingClustered()) {
result.setResult(true);
return result;
}
clustering = executionConfiguration.isExecutingClustered();
remoteSlaveServer = executionConfiguration.getRemoteServer();
doFallback = false;
} catch (KettleException e) {
log.logError(e.getMessage(), getName());
result.setNrErrors(1);
result.setResult(false);
return result;
}
}
if (doFallback) {
//
if (!Utils.isEmpty(remoteSlaveServerName)) {
String realRemoteSlaveServerName = environmentSubstitute(remoteSlaveServerName);
remoteSlaveServer = parentJob.getJobMeta().findSlaveServer(realRemoteSlaveServerName);
if (remoteSlaveServer == null) {
throw new KettleException(BaseMessages.getString(PKG, "JobTrans.Exception.UnableToFindRemoteSlaveServer", realRemoteSlaveServerName));
}
}
}
//
if (clustering) {
executionConfiguration.setClusterPosting(true);
executionConfiguration.setClusterPreparing(true);
executionConfiguration.setClusterStarting(true);
executionConfiguration.setClusterShowingTransformation(false);
executionConfiguration.setSafeModeEnabled(false);
executionConfiguration.setRepository(rep);
executionConfiguration.setLogLevel(transLogLevel);
executionConfiguration.setPreviousResult(previousResult);
// Also pass the variables from the transformation into the execution configuration
// That way it can go over the HTTP connection to the slave server.
//
executionConfiguration.setVariables(transMeta);
// Also set the arguments...
//
executionConfiguration.setArgumentStrings(args);
if (parentJob.getJobMeta().isBatchIdPassed()) {
executionConfiguration.setPassedBatchId(parentJob.getPassedBatchId());
}
TransSplitter transSplitter = null;
long errors = 0;
try {
transSplitter = Trans.executeClustered(transMeta, executionConfiguration);
// Monitor the running transformations, wait until they are done.
// Also kill them all if anything goes bad
// Also clean up afterwards...
//
errors += Trans.monitorClusteredTransformation(log, transSplitter, parentJob);
} catch (Exception e) {
logError("Error during clustered execution. Cleaning up clustered execution.", e);
// In case something goes wrong, make sure to clean up afterwards!
//
errors++;
if (transSplitter != null) {
Trans.cleanupCluster(log, transSplitter);
} else {
// Try to clean anyway...
//
SlaveServer master = null;
for (StepMeta stepMeta : transMeta.getSteps()) {
if (stepMeta.isClustered()) {
for (SlaveServer slaveServer : stepMeta.getClusterSchema().getSlaveServers()) {
if (slaveServer.isMaster()) {
master = slaveServer;
break;
}
}
}
}
if (master != null) {
master.deAllocateServerSockets(transMeta.getName(), null);
}
}
}
result.clear();
if (transSplitter != null) {
Result clusterResult = Trans.getClusteredTransformationResult(log, transSplitter, parentJob, executionConfiguration.isLogRemoteExecutionLocally());
result.add(clusterResult);
}
result.setNrErrors(result.getNrErrors() + errors);
} else if (remoteSlaveServer != null) {
// Execute this transformation remotely
//
// Make sure we can parameterize the slave server connection
//
remoteSlaveServer.shareVariablesWith(this);
// Remote execution...
//
executionConfiguration.setPreviousResult(previousResult.clone());
executionConfiguration.setArgumentStrings(args);
executionConfiguration.setVariables(this);
executionConfiguration.setRemoteServer(remoteSlaveServer);
executionConfiguration.setLogLevel(transLogLevel);
executionConfiguration.setRepository(rep);
executionConfiguration.setLogFileName(realLogFilename);
executionConfiguration.setSetAppendLogfile(setAppendLogfile);
executionConfiguration.setSetLogfile(setLogfile);
Map<String, String> params = executionConfiguration.getParams();
for (String param : transMeta.listParameters()) {
String value = Const.NVL(transMeta.getParameterValue(param), Const.NVL(transMeta.getParameterDefault(param), transMeta.getVariable(param)));
params.put(param, value);
}
if (parentJob.getJobMeta().isBatchIdPassed()) {
executionConfiguration.setPassedBatchId(parentJob.getPassedBatchId());
}
// Send the XML over to the slave server
// Also start the transformation over there...
//
String carteObjectId = Trans.sendToSlaveServer(transMeta, executionConfiguration, rep, metaStore);
// Now start the monitoring...
//
SlaveServerTransStatus transStatus = null;
while (!parentJob.isStopped() && waitingToFinish) {
try {
transStatus = remoteSlaveServer.getTransStatus(transMeta.getName(), carteObjectId, 0);
if (!transStatus.isRunning()) {
// The transformation is finished, get the result...
//
// get the status with the result ( we don't do it above because of changing PDI-15781)
transStatus = remoteSlaveServer.getTransStatus(transMeta.getName(), carteObjectId, 0, true);
Result remoteResult = transStatus.getResult();
result.clear();
result.add(remoteResult);
//
if (remoteResult.isStopped()) {
//
result.setNrErrors(result.getNrErrors() + 1);
}
// Make sure to clean up : write a log record etc, close any left-over sockets etc.
//
remoteSlaveServer.cleanupTransformation(transMeta.getName(), carteObjectId);
break;
}
} catch (Exception e1) {
logError(BaseMessages.getString(PKG, "JobTrans.Error.UnableContactSlaveServer", "" + remoteSlaveServer, transMeta.getName()), e1);
result.setNrErrors(result.getNrErrors() + 1L);
// Stop looking too, chances are too low the server will come back on-line
break;
}
// sleep for 2 seconds
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
// Ignore
}
}
if (parentJob.isStopped()) {
//
if (transStatus == null || transStatus.isRunning()) {
// Try a remote abort ...
//
remoteSlaveServer.stopTransformation(transMeta.getName(), transStatus.getId());
// And a cleanup...
//
remoteSlaveServer.cleanupTransformation(transMeta.getName(), transStatus.getId());
// Set an error state!
//
result.setNrErrors(result.getNrErrors() + 1L);
}
}
} else {
// Execute this transformation on the local machine
//
// Create the transformation from meta-data
//
// trans = new Trans( transMeta, this );
final TransMeta meta = transMeta;
trans = new TransSupplier(transMeta, log, () -> new Trans(meta)).get();
trans.setParent(this);
// Pass the socket repository as early as possible...
//
trans.setSocketRepository(parentJob.getSocketRepository());
if (parentJob.getJobMeta().isBatchIdPassed()) {
trans.setPassedBatchId(parentJob.getPassedBatchId());
}
// set the parent job on the transformation, variables are taken from here...
//
trans.setParentJob(parentJob);
trans.setParentVariableSpace(parentJob);
trans.setLogLevel(transLogLevel);
trans.setPreviousResult(previousResult);
trans.setArguments(arguments);
// Mappings need the repository to load from
//
trans.setRepository(rep);
// inject the metaStore
trans.setMetaStore(metaStore);
// First get the root job
//
Job rootJob = parentJob;
while (rootJob.getParentJob() != null) {
rootJob = rootJob.getParentJob();
}
// Get the start and end-date from the root job...
//
trans.setJobStartDate(rootJob.getStartDate());
trans.setJobEndDate(rootJob.getEndDate());
//
for (DelegationListener delegationListener : parentJob.getDelegationListeners()) {
// TODO: copy some settings in the job execution configuration, not strictly needed
// but the execution configuration information is useful in case of a job re-start
//
delegationListener.transformationDelegationStarted(trans, new TransExecutionConfiguration());
}
try {
// Start execution...
//
trans.execute(args);
// TODO is it possible to implement Observer pattern to avoid Thread.sleep here?
while (!trans.isFinished() && trans.getErrors() == 0) {
if (parentJob.isStopped()) {
trans.stopAll();
break;
} else {
try {
Thread.sleep(0, 500);
} catch (InterruptedException e) {
// Ignore errors
}
}
}
trans.waitUntilFinished();
if (parentJob.isStopped() || trans.getErrors() != 0) {
trans.stopAll();
result.setNrErrors(1);
}
Result newResult = trans.getResult();
// clear only the numbers, NOT the files or rows.
result.clear();
result.add(newResult);
// Set the result rows too, if any ...
if (!Utils.isEmpty(newResult.getRows())) {
result.setRows(newResult.getRows());
}
if (setLogfile) {
ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, KettleVFS.getFileObject(realLogFilename, this), parentJob.getJobname(), toString());
result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
}
} catch (KettleException e) {
logError(BaseMessages.getString(PKG, "JobTrans.Error.UnablePrepareExec"), e);
result.setNrErrors(1);
}
}
} catch (Exception e) {
logError(BaseMessages.getString(PKG, "JobTrans.ErrorUnableOpenTrans", e.getMessage()));
logError(Const.getStackTracker(e));
result.setNrErrors(1);
}
iteration++;
}
if (setLogfile) {
if (logChannelFileWriter != null) {
logChannelFileWriter.stopLogging();
ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, logChannelFileWriter.getLogFile(), parentJob.getJobname(), getName());
result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
//
if (logChannelFileWriter.getException() != null) {
logError("Unable to open log file [" + getLogFilename() + "] : ");
logError(Const.getStackTracker(logChannelFileWriter.getException()));
result.setNrErrors(1);
result.setResult(false);
return result;
}
}
}
if (result.getNrErrors() == 0) {
result.setResult(true);
} else {
result.setResult(false);
}
return result;
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class MasterSlaveIT method runParallelFileReadOnSlavesWithPartitioning2.
/**
* This test reads a CSV file in parallel on all 3 slaves, each with 4 partitions.<br>
* This is a variation on the test right above, with 2 steps in sequence in clustering & partitioning.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnSlavesWithPartitioning2() throws Exception {
TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/test-parallel-file-read-on-slaves-with-partitioning2.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
LogChannel logChannel = createLogChannel("cluster unit test <runParallelFileReadOnSlavesWithPartitioning2>");
long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
assertEquals(0L, nrErrors);
String result = loadFileContent(transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-slaves-with-partitioning2.txt");
assertEqualsIgnoreWhitespacesAndCase("100", result);
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class MasterSlaveIT method runParallelFileReadOnSlavesWithPartitioning.
/**
* This test reads a CSV file in parallel on all 3 slaves, each with 4 partitions.<br>
* It then passes the data over to a dummy step on the slaves.<br>
*/
public void runParallelFileReadOnSlavesWithPartitioning() throws Exception {
TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/test-parallel-file-read-on-slaves-with-partitioning.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
LogChannel logChannel = createLogChannel("cluster unit test <runParallelFileReadOnSlavesWithPartitioning>");
long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
assertEquals(0L, nrErrors);
String result = loadFileContent(transMeta, "${java.io.tmpdir}/test-parallel-file-read-on-slaves-with-partitioning.txt");
assertEqualsIgnoreWhitespacesAndCase("100", result);
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class MasterSlaveIT method runOneStepClustered.
/**
* This test generates rows on the master, generates random values clustered and brings them back the master.<br>
* See also: PDI-6324 : Generate Rows to a clustered step ceases to work
*/
public void runOneStepClustered() throws Exception {
TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/one-step-clustered.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
LogChannel logChannel = createLogChannel("cluster unit test <runOneStepClustered>");
long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
assertEquals(0L, nrErrors);
String result = loadFileContent(transMeta, "${java.io.tmpdir}/one-step-clustered.txt");
assertEqualsIgnoreWhitespacesAndCase("10000", result);
}
use of org.pentaho.di.trans.cluster.TransSplitter in project pentaho-kettle by pentaho.
the class MasterSlaveIT method runSubtransformationClustered.
/**
* This test check passing rows to sub-transformation executed on cluster
* See PDI-10704 for details
* @throws Exception
*/
public void runSubtransformationClustered() throws Exception {
TransMeta transMeta = loadTransMetaReplaceSlavesInCluster(clusterGenerator, "test/org/pentaho/di/cluster/test-subtrans-clustered.ktr");
TransExecutionConfiguration config = createClusteredTransExecutionConfiguration();
Result prevResult = new Result();
prevResult.setRows(getSampleRows());
config.setPreviousResult(prevResult);
TransSplitter transSplitter = Trans.executeClustered(transMeta, config);
LogChannel logChannel = createLogChannel("cluster unit test <runSubtransformationClustered>");
long nrErrors = Trans.monitorClusteredTransformation(logChannel, transSplitter, null, 1);
assertEquals(0L, nrErrors);
String result = loadFileContent(transMeta, "${java.io.tmpdir}/test-subtrans-clustered.txt");
assertEqualsIgnoreWhitespacesAndCase("10", result);
}
Aggregations