use of org.pentaho.di.trans.SlaveStepCopyPartitionDistribution in project pentaho-kettle by pentaho.
the class BaseStep method specialPartitioning.
private void specialPartitioning(RowMetaInterface rowMeta, Object[] row) throws KettleStepException {
if (nextStepPartitioningMeta == null) {
// Look up the partitioning of the next step.
// This is the case for non-clustered partitioning...
//
List<StepMeta> nextSteps = transMeta.findNextSteps(stepMeta);
if (nextSteps.size() > 0) {
nextStepPartitioningMeta = nextSteps.get(0).getStepPartitioningMeta();
}
// TODO: throw exception if we're not partitioning yet.
// For now it throws a NP Exception.
}
int partitionNr;
try {
partitionNr = nextStepPartitioningMeta.getPartition(rowMeta, row);
} catch (KettleException e) {
throw new KettleStepException("Unable to convert a value to integer while calculating the partition number", e);
}
RowSet selectedRowSet = null;
if (clusteredPartitioningFirst) {
clusteredPartitioningFirst = false;
// We are only running remotely if both the distribution is there AND if the distribution is actually contains
// something.
//
clusteredPartitioning = transMeta.getSlaveStepCopyPartitionDistribution() != null && !transMeta.getSlaveStepCopyPartitionDistribution().getDistribution().isEmpty();
}
//
if (clusteredPartitioning) {
//
if (partitionNrRowSetList == null) {
partitionNrRowSetList = new RowSet[outputRowSets.size()];
// The distribution is calculated during transformation split
// The slave-step-copy distribution is passed onto the slave transformation
//
SlaveStepCopyPartitionDistribution distribution = transMeta.getSlaveStepCopyPartitionDistribution();
String nextPartitionSchemaName = TransSplitter.createPartitionSchemaNameFromTarget(nextStepPartitioningMeta.getPartitionSchema().getName());
for (RowSet outputRowSet : outputRowSets) {
try {
// Look at the pre-determined distribution, decided at "transformation split" time.
//
int partNr = distribution.getPartition(outputRowSet.getRemoteSlaveServerName(), nextPartitionSchemaName, outputRowSet.getDestinationStepCopy());
if (partNr < 0) {
throw new KettleStepException("Unable to find partition using rowset data, slave=" + outputRowSet.getRemoteSlaveServerName() + ", partition schema=" + nextStepPartitioningMeta.getPartitionSchema().getName() + ", copy=" + outputRowSet.getDestinationStepCopy());
}
partitionNrRowSetList[partNr] = outputRowSet;
} catch (NullPointerException e) {
throw (e);
}
}
}
//
if (partitionNr < partitionNrRowSetList.length) {
selectedRowSet = partitionNrRowSetList[partitionNr];
} else {
String rowsets = "";
for (RowSet rowSet : partitionNrRowSetList) {
rowsets += "[" + rowSet.toString() + "] ";
}
throw new KettleStepException("Internal error: the referenced partition nr '" + partitionNr + "' is higher than the maximum of '" + (partitionNrRowSetList.length - 1) + ". The available row sets are: {" + rowsets + "}");
}
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel("Partitioned #" + partitionNr + " to " + selectedRowSet + ", row=" + rowMeta.getString(row));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
} else {
// Local partitioning...
// Put the row forward to the next step according to the partition rule.
//
// Count of partitioned row at one step
int partCount = ((BasePartitioner) nextStepPartitioningMeta.getPartitioner()).getNrPartitions();
for (int i = 0; i < nextSteps.length; i++) {
selectedRowSet = outputRowSets.get(partitionNr + i * partCount);
if (selectedRowSet == null) {
logBasic(BaseMessages.getString(PKG, "BaseStep.TargetRowsetIsNotAvailable", partitionNr));
} else {
// Wait
putRowToRowSet(selectedRowSet, rowMeta, row);
incrementLinesWritten();
if (log.isRowLevel()) {
try {
logRowlevel(BaseMessages.getString(PKG, "BaseStep.PartitionedToRow", partitionNr, selectedRowSet, rowMeta.getString(row)));
} catch (KettleValueException e) {
throw new KettleStepException(e);
}
}
}
}
}
}
use of org.pentaho.di.trans.SlaveStepCopyPartitionDistribution in project pentaho-kettle by pentaho.
the class BaseStep method init.
/*
* (non-Javadoc)
*
* @see org.pentaho.di.trans.step.StepInterface#init(org.pentaho.di.trans.step.StepMetaInterface,
* org.pentaho.di.trans.step.StepDataInterface)
*/
@Override
public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
sdi.setStatus(StepExecutionStatus.STATUS_INIT);
String slaveNr = transMeta.getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NUMBER);
String clusterSize = transMeta.getVariable(Const.INTERNAL_VARIABLE_CLUSTER_SIZE);
boolean master = "Y".equalsIgnoreCase(transMeta.getVariable(Const.INTERNAL_VARIABLE_CLUSTER_MASTER));
if (!Utils.isEmpty(slaveNr) && !Utils.isEmpty(clusterSize) && !master) {
this.slaveNr = Integer.parseInt(slaveNr);
this.clusterSize = Integer.parseInt(clusterSize);
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BaseStep.Log.ReleasedServerSocketOnPort", slaveNr, clusterSize));
}
} else {
this.slaveNr = 0;
this.clusterSize = 0;
}
// Also set the internal variable for the partition
//
SlaveStepCopyPartitionDistribution partitionDistribution = transMeta.getSlaveStepCopyPartitionDistribution();
if (stepMeta.isPartitioned()) {
//
if (partitionDistribution != null && !partitionDistribution.getDistribution().isEmpty()) {
String slaveServerName = getVariable(Const.INTERNAL_VARIABLE_SLAVE_SERVER_NAME);
int stepCopyNr = stepcopy;
// Look up the partition nr...
// Set the partition ID (string) as well as the partition nr [0..size[
//
PartitionSchema partitionSchema = stepMeta.getStepPartitioningMeta().getPartitionSchema();
int partitionNr = partitionDistribution.getPartition(slaveServerName, partitionSchema.getName(), stepCopyNr);
if (partitionNr >= 0) {
String partitionNrString = new DecimalFormat("000").format(partitionNr);
setVariable(Const.INTERNAL_VARIABLE_STEP_PARTITION_NR, partitionNrString);
if (partitionDistribution.getOriginalPartitionSchemas() != null) {
// What is the partition schema name?
//
String partitionSchemaName = stepMeta.getStepPartitioningMeta().getPartitionSchema().getName();
//
for (PartitionSchema originalPartitionSchema : partitionDistribution.getOriginalPartitionSchemas()) {
String slavePartitionSchemaName = TransSplitter.createSlavePartitionSchemaName(originalPartitionSchema.getName());
if (slavePartitionSchemaName.equals(partitionSchemaName)) {
PartitionSchema schema = (PartitionSchema) originalPartitionSchema.clone();
//
if (schema.isDynamicallyDefined()) {
schema.expandPartitionsDynamically(this.clusterSize, this);
}
String partID = schema.getPartitionIDs().get(partitionNr);
setVariable(Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partID);
break;
}
}
}
}
} else {
// This is a locally partitioned step...
//
int partitionNr = stepcopy;
String partitionNrString = new DecimalFormat("000").format(partitionNr);
setVariable(Const.INTERNAL_VARIABLE_STEP_PARTITION_NR, partitionNrString);
final List<String> partitionIDList = stepMeta.getStepPartitioningMeta().getPartitionSchema().getPartitionIDs();
if (partitionIDList.size() > 0) {
String partitionID = partitionIDList.get(partitionNr);
setVariable(Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partitionID);
} else {
logError(BaseMessages.getString(PKG, "BaseStep.Log.UnableToRetrievePartitionId", stepMeta.getStepPartitioningMeta().getPartitionSchema().getName()));
return false;
}
}
} else if (!Utils.isEmpty(partitionID)) {
setVariable(Const.INTERNAL_VARIABLE_STEP_PARTITION_ID, partitionID);
}
// Set a unique step number across all slave servers
//
// slaveNr * nrCopies + copyNr
//
uniqueStepNrAcrossSlaves = this.slaveNr * getStepMeta().getCopies() + stepcopy;
uniqueStepCountAcrossSlaves = this.clusterSize <= 1 ? getStepMeta().getCopies() : this.clusterSize * getStepMeta().getCopies();
if (uniqueStepCountAcrossSlaves == 0) {
uniqueStepCountAcrossSlaves = 1;
}
setVariable(Const.INTERNAL_VARIABLE_STEP_UNIQUE_NUMBER, Integer.toString(uniqueStepNrAcrossSlaves));
setVariable(Const.INTERNAL_VARIABLE_STEP_UNIQUE_COUNT, Integer.toString(uniqueStepCountAcrossSlaves));
setVariable(Const.INTERNAL_VARIABLE_STEP_COPYNR, Integer.toString(stepcopy));
// BACKLOG-18004
allowEmptyFieldNamesAndTypes = Boolean.parseBoolean(System.getProperties().getProperty(Const.KETTLE_ALLOW_EMPTY_FIELD_NAMES_AND_TYPES, "false"));
//
try {
// If this is on the master, separate logic applies.
//
// boolean isMaster = "Y".equalsIgnoreCase(getVariable(Const.INTERNAL_VARIABLE_CLUSTER_MASTER));
remoteOutputSteps = new ArrayList<RemoteStep>();
for (int i = 0; i < stepMeta.getRemoteOutputSteps().size(); i++) {
RemoteStep remoteStep = stepMeta.getRemoteOutputSteps().get(i);
//
if (getCopy() == remoteStep.getSourceStepCopyNr()) {
// Open a server socket to allow the remote output step to connect.
//
RemoteStep copy = (RemoteStep) remoteStep.clone();
try {
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BaseStep.Log.SelectedRemoteOutputStepToServer", copy, copy.getTargetStep(), copy.getTargetStepCopyNr(), copy.getPort()));
}
copy.openServerSocket(this);
if (log.isDetailed()) {
logDetailed(BaseMessages.getString(PKG, "BaseStep.Log.OpenedServerSocketConnectionTo", copy));
}
} catch (Exception e) {
logError("Unable to open server socket during step initialisation: " + copy.toString(), e);
throw e;
}
remoteOutputSteps.add(copy);
}
}
} catch (Exception e) {
for (RemoteStep remoteStep : remoteOutputSteps) {
if (remoteStep.getServerSocket() != null) {
try {
ServerSocket serverSocket = remoteStep.getServerSocket();
getTrans().getSocketRepository().releaseSocket(serverSocket.getLocalPort());
} catch (IOException e1) {
logError("Unable to close server socket after error during step initialisation", e);
}
}
}
return false;
}
//
try {
remoteInputSteps = new ArrayList<RemoteStep>();
if ((stepMeta.isPartitioned() && getClusterSize() > 1) || stepMeta.getCopies() > 1) {
//
for (int i = 0; i < stepMeta.getRemoteInputSteps().size(); i++) {
RemoteStep remoteStep = stepMeta.getRemoteInputSteps().get(i);
if (remoteStep.getTargetStepCopyNr() == stepcopy) {
RemoteStep copy = (RemoteStep) remoteStep.clone();
remoteInputSteps.add(copy);
}
}
} else {
for (RemoteStep remoteStep : stepMeta.getRemoteInputSteps()) {
RemoteStep copy = (RemoteStep) remoteStep.clone();
remoteInputSteps.add(copy);
}
}
} catch (Exception e) {
logError("Unable to initialize remote input steps during step initialisation", e);
return false;
}
// Getting ans setting the error handling values
// first, get the step meta
StepErrorMeta stepErrorMeta = stepMeta.getStepErrorMeta();
if (stepErrorMeta != null) {
// do an environment substitute for stepErrorMeta.getMaxErrors(), stepErrorMeta.getMinPercentRows()
// and stepErrorMeta.getMaxPercentErrors()
// Catch NumberFormatException since the user can enter anything in the dialog- the value
// they enter must be a number or a variable set to a number
// We will use a boolean to indicate failure so that we can log all errors - not just the first one caught
boolean envSubFailed = false;
try {
maxErrors = (!Utils.isEmpty(stepErrorMeta.getMaxErrors()) ? Long.valueOf(trans.environmentSubstitute(stepErrorMeta.getMaxErrors())) : -1L);
} catch (NumberFormatException nfe) {
log.logError(BaseMessages.getString(PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString(PKG, "BaseStep.Property.MaxErrors.Name"), this.stepname, (stepErrorMeta.getMaxErrors() != null ? stepErrorMeta.getMaxErrors() : "")));
envSubFailed = true;
}
try {
minRowsForMaxErrorPercent = (!Utils.isEmpty(stepErrorMeta.getMinPercentRows()) ? Long.valueOf(trans.environmentSubstitute(stepErrorMeta.getMinPercentRows())) : -1L);
} catch (NumberFormatException nfe) {
log.logError(BaseMessages.getString(PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString(PKG, "BaseStep.Property.MinRowsForErrorsPercentCalc.Name"), this.stepname, (stepErrorMeta.getMinPercentRows() != null ? stepErrorMeta.getMinPercentRows() : "")));
envSubFailed = true;
}
try {
maxPercentErrors = (!Utils.isEmpty(stepErrorMeta.getMaxPercentErrors()) ? Integer.valueOf(trans.environmentSubstitute(stepErrorMeta.getMaxPercentErrors())) : -1);
} catch (NumberFormatException nfe) {
log.logError(BaseMessages.getString(PKG, "BaseStep.Log.NumberFormatException", BaseMessages.getString(PKG, "BaseStep.Property.MaxPercentErrors.Name"), this.stepname, (stepErrorMeta.getMaxPercentErrors() != null ? stepErrorMeta.getMaxPercentErrors() : "")));
envSubFailed = true;
}
// if we failed and environment subsutitue
if (envSubFailed) {
return false;
}
}
return true;
}
use of org.pentaho.di.trans.SlaveStepCopyPartitionDistribution in project pentaho-kettle by pentaho.
the class RepositoryTestBase method createTransMeta.
protected TransMeta createTransMeta(final String dbName) throws Exception {
RepositoryDirectoryInterface rootDir = loadStartDirectory();
TransMeta transMeta = new TransMeta();
transMeta.setName(EXP_TRANS_NAME.concat(dbName));
transMeta.setDescription(EXP_TRANS_DESC);
transMeta.setExtendedDescription(EXP_TRANS_EXTENDED_DESC);
transMeta.setRepositoryDirectory(rootDir.findDirectory(DIR_TRANSFORMATIONS));
transMeta.setTransversion(EXP_TRANS_VERSION);
transMeta.setTransstatus(EXP_TRANS_STATUS);
transMeta.setCreatedUser(EXP_TRANS_CREATED_USER);
transMeta.setCreatedDate(EXP_TRANS_CREATED_DATE);
transMeta.setModifiedUser(EXP_TRANS_MOD_USER);
transMeta.setModifiedDate(EXP_TRANS_MOD_DATE);
transMeta.addParameterDefinition(EXP_TRANS_PARAM_1_NAME, EXP_TRANS_PARAM_1_DEF, EXP_TRANS_PARAM_1_DESC);
// TODO mlowery other transLogTable fields could be set for testing here
TransLogTable transLogTable = TransLogTable.getDefault(transMeta, transMeta, new ArrayList<StepMeta>(0));
transLogTable.setConnectionName(EXP_TRANS_LOG_TABLE_CONN_NAME);
transLogTable.setLogInterval(EXP_TRANS_LOG_TABLE_INTERVAL);
transLogTable.setSchemaName(EXP_TRANS_LOG_TABLE_SCHEMA_NAME);
transLogTable.setLogSizeLimit(EXP_TRANS_LOG_TABLE_SIZE_LIMIT);
transLogTable.setTableName(EXP_TRANS_LOG_TABLE_TABLE_NAME);
transLogTable.setTimeoutInDays(EXP_TRANS_LOG_TABLE_TIMEOUT_IN_DAYS);
transMeta.setTransLogTable(transLogTable);
// TODO mlowery other perfLogTable fields could be set for testing here
PerformanceLogTable perfLogTable = PerformanceLogTable.getDefault(transMeta, transMeta);
perfLogTable.setConnectionName(EXP_TRANS_LOG_TABLE_CONN_NAME);
perfLogTable.setLogInterval(EXP_TRANS_LOG_TABLE_INTERVAL);
perfLogTable.setSchemaName(EXP_TRANS_LOG_TABLE_SCHEMA_NAME);
perfLogTable.setTableName(EXP_TRANS_LOG_TABLE_TABLE_NAME);
perfLogTable.setTimeoutInDays(EXP_TRANS_LOG_TABLE_TIMEOUT_IN_DAYS);
transMeta.setPerformanceLogTable(perfLogTable);
// TODO mlowery other channelLogTable fields could be set for testing here
ChannelLogTable channelLogTable = ChannelLogTable.getDefault(transMeta, transMeta);
channelLogTable.setConnectionName(EXP_TRANS_LOG_TABLE_CONN_NAME);
channelLogTable.setSchemaName(EXP_TRANS_LOG_TABLE_SCHEMA_NAME);
channelLogTable.setTableName(EXP_TRANS_LOG_TABLE_TABLE_NAME);
channelLogTable.setTimeoutInDays(EXP_TRANS_LOG_TABLE_TIMEOUT_IN_DAYS);
transMeta.setChannelLogTable(channelLogTable);
// TODO mlowery other stepLogTable fields could be set for testing here
StepLogTable stepLogTable = StepLogTable.getDefault(transMeta, transMeta);
stepLogTable.setConnectionName(EXP_TRANS_LOG_TABLE_CONN_NAME);
stepLogTable.setSchemaName(EXP_TRANS_LOG_TABLE_SCHEMA_NAME);
stepLogTable.setTableName(EXP_TRANS_LOG_TABLE_TABLE_NAME);
stepLogTable.setTimeoutInDays(EXP_TRANS_LOG_TABLE_TIMEOUT_IN_DAYS);
transMeta.setStepLogTable(stepLogTable);
DatabaseMeta dbMeta = createDatabaseMeta(dbName);
// dbMeta must be saved so that it gets an ID
repository.save(dbMeta, VERSION_COMMENT_V1, null);
deleteStack.push(dbMeta);
transMeta.setMaxDateConnection(dbMeta);
transMeta.setMaxDateTable(EXP_TRANS_MAX_DATE_TABLE);
transMeta.setMaxDateField(EXP_TRANS_MAX_DATE_FIELD);
transMeta.setMaxDateOffset(EXP_TRANS_MAX_DATE_OFFSET);
transMeta.setMaxDateDifference(EXP_TRANS_MAX_DATE_DIFF);
transMeta.setSizeRowset(EXP_TRANS_SIZE_ROWSET);
transMeta.setSleepTimeEmpty(EXP_TRANS_SLEEP_TIME_EMPTY);
transMeta.setSleepTimeFull(EXP_TRANS_SLEEP_TIME_FULL);
transMeta.setUsingUniqueConnections(EXP_TRANS_USING_UNIQUE_CONN);
transMeta.setFeedbackShown(EXP_TRANS_FEEDBACK_SHOWN);
transMeta.setFeedbackSize(EXP_TRANS_FEEDBACK_SIZE);
transMeta.setUsingThreadPriorityManagment(EXP_TRANS_USING_THREAD_PRIORITY_MGMT);
transMeta.setSharedObjectsFile(EXP_TRANS_SHARED_OBJECTS_FILE);
transMeta.setCapturingStepPerformanceSnapShots(EXP_TRANS_CAPTURE_STEP_PERF_SNAPSHOTS);
transMeta.setStepPerformanceCapturingDelay(EXP_TRANS_STEP_PERF_CAP_DELAY);
transMeta.addDependency(new TransDependency(dbMeta, EXP_TRANS_DEP_TABLE_NAME, EXP_TRANS_DEP_FIELD_NAME));
DatabaseMeta stepDbMeta = createDatabaseMeta(EXP_DBMETA_NAME_STEP.concat(dbName));
repository.save(stepDbMeta, VERSION_COMMENT_V1, null);
deleteStack.push(stepDbMeta);
Condition cond = new Condition();
StepMeta step1 = createStepMeta1(transMeta, stepDbMeta, cond);
transMeta.addStep(step1);
StepMeta step2 = createStepMeta2(stepDbMeta, cond);
transMeta.addStep(step2);
transMeta.addTransHop(createTransHopMeta(step1, step2));
SlaveServer slaveServer = createSlaveServer(dbName);
PartitionSchema partSchema = createPartitionSchema(dbName);
// slaveServer, partSchema must be saved so that they get IDs
repository.save(slaveServer, VERSION_COMMENT_V1, null);
deleteStack.push(slaveServer);
repository.save(partSchema, VERSION_COMMENT_V1, null);
deleteStack.push(partSchema);
SlaveStepCopyPartitionDistribution slaveStepCopyPartitionDistribution = new SlaveStepCopyPartitionDistribution();
slaveStepCopyPartitionDistribution.addPartition(EXP_SLAVE_NAME, EXP_PART_SCHEMA_NAME, 0);
slaveStepCopyPartitionDistribution.setOriginalPartitionSchemas(Arrays.asList(new PartitionSchema[] { partSchema }));
transMeta.setSlaveStepCopyPartitionDistribution(slaveStepCopyPartitionDistribution);
transMeta.setSlaveTransformation(EXP_TRANS_SLAVE_TRANSFORMATION);
return transMeta;
}
Aggregations